library(tidyverse) # for data wrangling etc
library(rstanarm) # for fitting models in STAN
library(cmdstanr) # for cmdstan
library(brms) # for fitting models in STAN
library(standist) # for exploring distributions
library(HDInterval) # for HPD intervals
library(posterior) # for posterior draws
library(coda) # for diagnostics
library(bayesplot) # for diagnostics
library(ggmcmc) # for MCMC diagnostics
library(DHARMa) # for residual diagnostics
library(rstan) # for interfacing with STAN
library(emmeans) # for marginal means etc
library(broom) # for tidying outputs
library(tidybayes) # for more tidying outputs
library(ggeffects) # for partial plots
library(broom.mixed) # for summarising models
library(ggeffects) # for partial effects plots
library(bayestestR) # for ROPE
library(see) # for some plots
library(easystats) # for the easystats ecosystem
library(ggridges) # for ridge plots
library(patchwork) # for multiple plots
library(modelsummary) # for data and model summaries
theme_set(theme_grey()) # put the default ggplot theme back
source("helperFunctions.R")Bayesian GLM Part6
1 Preparations
Load the necessary libraries
2 Scenario
An ecologist studying a rocky shore at Phillip Island, in southeastern Australia, was interested in how clumps of intertidal mussels are maintained (Quinn 1988). In particular, he wanted to know how densities of adult mussels affected recruitment of young individuals from the plankton. As with most marine invertebrates, recruitment is highly patchy in time, so he expected to find seasonal variation, and the interaction between season and density - whether effects of adult mussel density vary across seasons - was the aspect of most interest.
The data were collected from four seasons, and with two densities of adult mussels. The experiment consisted of clumps of adult mussels attached to the rocks. These clumps were then brought back to the laboratory, and the number of baby mussels recorded. There were 3-6 replicate clumps for each density and season combination.
| SEASON | DENSITY | RECRUITS | SQRTRECRUITS | GROUP |
|---|---|---|---|---|
| Spring | Low | 15 | 3.87 | SpringLow |
| .. | .. | .. | .. | .. |
| Spring | High | 11 | 3.32 | SpringHigh |
| .. | .. | .. | .. | .. |
| Summer | Low | 21 | 4.58 | SummerLow |
| .. | .. | .. | .. | .. |
| Summer | High | 34 | 5.83 | SummerHigh |
| .. | .. | .. | .. | .. |
| Autumn | Low | 14 | 3.74 | AutumnLow |
| .. | .. | .. | .. | .. |
| SEASON | Categorical listing of Season in which mussel clumps were collected independent variable |
| DENSITY | Categorical listing of the density of mussels within mussel clump independent variable |
| RECRUITS | The number of mussel recruits response variable |
| SQRTRECRUITS | Square root transformation of RECRUITS - needed to meet the test assumptions |
| GROUPS | Categorical listing of Season/Density combinations - used for checking ANOVA assumptions |
3 Read in the data
Rows: 42 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): SEASON, DENSITY, GROUP
dbl (2): RECRUITS, SQRTRECRUITS
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 42
Columns: 5
$ SEASON <chr> "Spring", "Spring", "Spring", "Spring", "Spring", "Spring…
$ DENSITY <chr> "Low", "Low", "Low", "Low", "Low", "High", "High", "High"…
$ RECRUITS <dbl> 15, 10, 13, 13, 5, 11, 10, 15, 10, 13, 1, 21, 31, 21, 18,…
$ SQRTRECRUITS <dbl> 3.872983, 3.162278, 3.605551, 3.605551, 2.236068, 3.31662…
$ GROUP <chr> "SpringLow", "SpringLow", "SpringLow", "SpringLow", "Spri…
# A tibble: 6 × 5
SEASON DENSITY RECRUITS SQRTRECRUITS GROUP
<chr> <chr> <dbl> <dbl> <chr>
1 Spring Low 15 3.87 SpringLow
2 Spring Low 10 3.16 SpringLow
3 Spring Low 13 3.61 SpringLow
4 Spring Low 13 3.61 SpringLow
5 Spring Low 5 2.24 SpringLow
6 Spring High 11 3.32 SpringHigh
spc_tbl_ [42 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ SEASON : chr [1:42] "Spring" "Spring" "Spring" "Spring" ...
$ DENSITY : chr [1:42] "Low" "Low" "Low" "Low" ...
$ RECRUITS : num [1:42] 15 10 13 13 5 11 10 15 10 13 ...
$ SQRTRECRUITS: num [1:42] 3.87 3.16 3.61 3.61 2.24 ...
$ GROUP : chr [1:42] "SpringLow" "SpringLow" "SpringLow" "SpringLow" ...
- attr(*, "spec")=
.. cols(
.. SEASON = col_character(),
.. DENSITY = col_character(),
.. RECRUITS = col_double(),
.. SQRTRECRUITS = col_double(),
.. GROUP = col_character()
.. )
- attr(*, "problems")=<externalptr>
quinn (42 rows and 5 variables, 5 shown)
ID | Name | Type | Missings | Values | N
---+--------------+-----------+----------+------------+-----------
1 | SEASON | character | 0 (0.0%) | Autumn | 10 (23.8%)
| | | | Spring | 11 (26.2%)
| | | | Summer | 12 (28.6%)
| | | | Winter | 9 (21.4%)
---+--------------+-----------+----------+------------+-----------
2 | DENSITY | character | 0 (0.0%) | High | 24 (57.1%)
| | | | Low | 18 (42.9%)
---+--------------+-----------+----------+------------+-----------
3 | RECRUITS | numeric | 0 (0.0%) | [0, 69] | 42
---+--------------+-----------+----------+------------+-----------
4 | SQRTRECRUITS | numeric | 0 (0.0%) | [0, 8.31] | 42
---+--------------+-----------+----------+------------+-----------
5 | GROUP | character | 0 (0.0%) | AutumnHigh | 6 (14.3%)
| | | | AutumnLow | 4 ( 9.5%)
| | | | SpringHigh | 6 (14.3%)
| | | | SpringLow | 5 (11.9%)
| | | | SummerHigh | 6 (14.3%)
| | | | SummerLow | 6 (14.3%)
| | | | WinterHigh | 6 (14.3%)
| | | | WinterLow | 3 ( 7.1%)
------------------------------------------------------------------
| Unique | Missing Pct. | Mean | SD | Min | Median | Max | Histogram | |
|---|---|---|---|---|---|---|---|---|
| RECRUITS | 25 | 0 | 18.3 | 15.7 | 0.0 | 13.5 | 69.0 | |
| SQRTRECRUITS | 25 | 0 | 3.9 | 1.9 | 0.0 | 3.7 | 8.3 | |
| N | % | |||||||
| SEASON | Autumn | 10 | 23.8 | |||||
| Spring | 11 | 26.2 | ||||||
| Summer | 12 | 28.6 | ||||||
| Winter | 9 | 21.4 | ||||||
| DENSITY | High | 24 | 57.1 | |||||
| Low | 18 | 42.9 | ||||||
| GROUP | AutumnHigh | 6 | 14.3 | |||||
| AutumnLow | 4 | 9.5 | ||||||
| SpringHigh | 6 | 14.3 | ||||||
| SpringLow | 5 | 11.9 | ||||||
| SummerHigh | 6 | 14.3 | ||||||
| SummerLow | 6 | 14.3 | ||||||
| WinterHigh | 6 | 14.3 | ||||||
| WinterLow | 3 | 7.1 |
quinn |>
dplyr::select(-SQRTRECRUITS) |>
modelsummary::datasummary_skim(
type = "numeric",
by = c("SEASON", "DENSITY")
)| SEASON | DENSITY | Unique | Missing Pct. | Mean | SD | Min | Median | Max | Histogram | |
|---|---|---|---|---|---|---|---|---|---|---|
| RECRUITS | Spring | Low | 4 | 0 | 11.2 | 3.9 | 5.0 | 13.0 | 15.0 | |
| Spring | High | 5 | 0 | 10.0 | 4.8 | 1.0 | 10.5 | 15.0 | ||
| Summer | Low | 5 | 0 | 22.0 | 6.1 | 14.0 | 21.0 | 31.0 | ||
| Summer | High | 6 | 0 | 48.2 | 15.0 | 28.0 | 51.5 | 69.0 | ||
| Autumn | Low | 4 | 0 | 18.2 | 3.1 | 14.0 | 19.0 | 21.0 | ||
| Autumn | High | 5 | 0 | 19.7 | 11.9 | 4.0 | 17.5 | 36.0 | ||
| Winter | Low | 2 | 0 | 2.7 | 4.6 | 0.0 | 0.0 | 8.0 | ||
| Winter | High | 5 | 0 | 5.7 | 3.3 | 1.0 | 5.0 | 10.0 |
4 Exploratory data analysis
Model formula: \[ \begin{align} y_i &\sim{} \mathcal{NB}(\lambda_i, \theta)\\ ln(\mu_i) &= \beta_0 + \sum_{j=1}^nT_{[i],j}.\beta_j\\ \beta_0 &\sim{} \mathcal{N}(2.4, 1.5)\\ \beta_{[1,2,3]} &\sim{} \mathcal{N}(0, 1)\\ \end{align} \]
where \(\beta_{0}\) is the y-intercept (mean of the first group), \(\beta_{[1,2,3]}\) are the vector of effects parameters (contrasting each group mean to that of the first group and \(T{[i],j}\) represents a \(i\) by \(j\) model matrix is a model matrix representing the season, density and their interaction on mussel recruitment.
5 Exploratory data analysis
The exploratory data analyses that we performed in the frequentist instalment of this example are equally valid here. That is, boxplots and/or violin plots for each population (substrate type).
# A tibble: 6 × 6
SEASON DENSITY RECRUITS SQRTRECRUITS GROUP fSEASON
<chr> <fct> <dbl> <dbl> <chr> <fct>
1 Spring Low 15 3.87 SpringLow Spring
2 Spring Low 10 3.16 SpringLow Spring
3 Spring Low 13 3.61 SpringLow Spring
4 Spring Low 13 3.61 SpringLow Spring
5 Spring Low 5 2.24 SpringLow Spring
6 Spring High 11 3.32 SpringHigh Spring
Conclusions:
- there is clearly a relationship between mean and variance (as would be expected with the a Poisson
- evidently there are numerous zeros in the Winter/Low group # Fit the model
In rstanarm, the default priors are designed to be weakly informative. They are chosen to provide moderate regularisation (to help prevent over-fitting) and help stabilise the computations.
Priors for model 'quinn.rstanarmP'
------
Intercept (after predictors centered)
~ normal(location = 0, scale = 2.5)
Coefficients
Specified prior:
~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
Adjusted prior:
~ normal(location = [0,0,0,...], scale = [5.47,5.80,6.02,...])
------
See help('prior_summary.stanreg') for more details
This tells us:
for the intercept, when the family is Poisson, it is using a normal prior with a mean of 0 and a standard deviation of 2.5. The 2.5 is used for all intercepts. It is often scaled, but only if it is larger than 2.5 is the scaled version kept.
for the coefficients (in this case, the individual effects), the default prior is a normal prior centred around 0 with a standard deviations of 5.47, 5/8, 6.02 etc. This is then adjusted for the scale of the data by dividing the 2.5 by the standard deviation of the numerical dummy variables for the predictor (then rounded).
fSEASONSummer fSEASONAutumn fSEASONWinter
5.467708 5.799380 6.019749
DENSITYLow fSEASONSummer:DENSITYLow fSEASONAutumn:DENSITYLow
4.991312 7.058781 8.414625
fSEASONWinter:DENSITYLow
9.590995
- there is no auxiliary prior as we are employing a Poisson distribution.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
quinn.rstanarm1 |>
ggpredict(~ fSEASON + DENSITY) |>
plot(show_data = TRUE) |>
wrap_plots() &
scale_y_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## although, since there are zeros...
quinn.rstanarm1 |>
ggpredict(~ fSEASON + DENSITY) |>
plot(show_data = TRUE, jitter = FALSE) |>
wrap_plots() &
scale_y_continuous(trans = scales::pseudo_log_trans())Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
quinn.rstanarm1 |>
ggemmeans(~ fSEASON + DENSITY) |>
plot(show_data = TRUE) |>
plot(show_data = TRUE) |>
wrap_plots() &
scale_y_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## although, since there are zeros...
quinn.rstanarm1 |>
ggemmeans(~ fSEASON + DENSITY) |>
plot(show_data = TRUE, jitter = FALSE) |>
wrap_plots() &
scale_y_continuous(trans = scales::pseudo_log_trans())Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Conclusions:
- we see that the range of predictions is fairly wide and the predicted means could range from 0 to very large (perhaps too large).
The following link provides some guidance about defining priors. [https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations]
When defining our own priors, we typically do not want them to be scaled.
If we wanted to define our own priors that were less vague, yet still not likely to bias the outcomes, we could try the following priors (mainly plucked out of thin air):
- \(\beta_0\): normal centred at 2.3 with a standard deviation of 5
- \(\beta_1\): normal centred at 0 with a standard deviation of 2
Remember the above are applied on the link scale.
I will also overlay the raw data for comparison.
quinn |>
group_by(fSEASON, DENSITY) |>
summarise(
Mean = log(mean(RECRUITS)),
SD = log(sd(RECRUITS))
)`summarise()` has grouped output by 'fSEASON'. You can override using the
`.groups` argument.
# A tibble: 8 × 4
# Groups: fSEASON [4]
fSEASON DENSITY Mean SD
<fct> <fct> <dbl> <dbl>
1 Spring High 2.30 1.57
2 Spring Low 2.42 1.36
3 Summer High 3.87 2.71
4 Summer Low 3.09 1.81
5 Autumn High 2.98 2.48
6 Autumn Low 2.90 1.13
7 Winter High 1.73 1.20
8 Winter Low 0.981 1.53
(Intercept) fSEASONSummer fSEASONAutumn
Inf 6.024998 6.390475
fSEASONWinter DENSITYLow fSEASONSummer:DENSITYLow
6.633304 5.500045 7.778238
fSEASONAutumn:DENSITYLow fSEASONWinter:DENSITYLow
9.272276 10.568545
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
quinn.rstanarm2 |>
ggpredict(~ fSEASON + DENSITY) |>
plot(show_data = TRUE) |>
wrap_plots() &
scale_y_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## although, since there are zeros...
quinn.rstanarm2 |>
ggpredict(~ fSEASON + DENSITY) |>
plot(show_data = TRUE, jitter = FALSE) |>
wrap_plots() &
scale_y_continuous(trans = scales::pseudo_log_trans())Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
quinn.rstanarm2 |>
ggemmeans(~ fSEASON + DENSITY) |>
plot(show_data = TRUE) |>
plot(show_data = TRUE) |>
wrap_plots() &
scale_y_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## although, since there are zeros...
quinn.rstanarm2 |>
ggemmeans(~ fSEASON + DENSITY) |>
plot(show_data = TRUE, jitter = FALSE) |>
wrap_plots() &
scale_y_continuous(trans = scales::pseudo_log_trans())Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Now lets refit, conditioning on the data.
In brms, the default priors are designed to be weakly informative. They are chosen to provide moderate regularisation (to help prevent over-fitting) and help stabilise the computations.
Unlike rstanarm, brms models must be compiled before they start sampling. For most models, the compilation of the stan code takes around 45 seconds.
prior class coef group resp dpar nlpar lb ub source
(flat) b default
(flat) b DENSITYLow (vectorized)
(flat) b fSEASONAutumn (vectorized)
(flat) b fSEASONAutumn:DENSITYLow (vectorized)
(flat) b fSEASONSummer (vectorized)
(flat) b fSEASONSummer:DENSITYLow (vectorized)
(flat) b fSEASONWinter (vectorized)
(flat) b fSEASONWinter:DENSITYLow (vectorized)
student_t(3, 2.6, 2.5) Intercept default
Remember that the priors are applied on the link (in this case, log) scale.
quinn |>
group_by(fSEASON, DENSITY) |>
summarise(
Mean = mean(RECRUITS),
Median = median(RECRUITS),
MAD = mad(RECRUITS),
SD = sd(RECRUITS)
) |>
mutate(
log(Mean),
log(Median),
log(MAD),
log(SD)
)`summarise()` has grouped output by 'fSEASON'. You can override using the
`.groups` argument.
# A tibble: 8 × 10
# Groups: fSEASON [4]
fSEASON DENSITY Mean Median MAD SD `log(Mean)` `log(Median)` `log(MAD)`
<fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Spring High 10 10.5 2.22 4.82 2.30 2.35 0.799
2 Spring Low 11.2 13 2.97 3.90 2.42 2.56 1.09
3 Summer High 48.2 51.5 15.6 15.0 3.87 3.94 2.75
4 Summer Low 22 21 6.67 6.13 3.09 3.04 1.90
5 Autumn High 19.7 17.5 12.6 11.9 2.98 2.86 2.53
6 Autumn Low 18.2 19 2.22 3.10 2.90 2.94 0.799
7 Winter High 5.67 5 3.71 3.33 1.73 1.61 1.31
8 Winter Low 2.67 0 0 4.62 0.981 -Inf -Inf
# ℹ 1 more variable: `log(SD)` <dbl>
- \(\beta_0\): normal centred at 2.3 with a standard deviation of 1.5
- \(\beta_1\): normal centred at 0 with a standard deviation of 1
priors <- prior(normal(2.4, 1.5), class = "Intercept") +
prior(normal(0, 1), class = "b")
quinn.brm2 <- brm(quinn.form,
data = quinn,
prior = priors,
sample_prior = "only",
refresh = 0,
chains = 3,
iter = 5000,
thin = 5,
warmup = 2500,
backend = "rstan"
)Compiling Stan program...
Start sampling
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## Or since there are zeros
quinn.brm2 |>
ggpredict(~ fSEASON + DENSITY) |>
plot(show_data = TRUE) +
scale_y_continuous(trans = scales::pseudo_log_trans())Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## Or since there are zeros
quinn.brm2 |>
ggemmeans(~ fSEASON + DENSITY) |>
plot(show_data = TRUE) +
scale_y_continuous(trans = scales::pseudo_log_trans())Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
[1] "b_Intercept" "b_fSEASONSummer"
[3] "b_fSEASONAutumn" "b_fSEASONWinter"
[5] "b_DENSITYLow" "b_fSEASONSummer:DENSITYLow"
[7] "b_fSEASONAutumn:DENSITYLow" "b_fSEASONWinter:DENSITYLow"
[9] "Intercept" "prior_Intercept"
[11] "prior_b" "lprior"
[13] "lp__" "accept_stat__"
[15] "stepsize__" "treedepth__"
[17] "n_leapfrog__" "divergent__"
[19] "energy__"
quinn.brmP |>
posterior_samples() |>
dplyr::select(-`lp__`) |>
pivot_longer(everything(), names_to = "key") |>
mutate(
Type = ifelse(str_detect(key, "prior"), "Prior", "b"),
Class = ifelse(str_detect(key, "Intercept"), "Intercept",
ifelse(str_detect(key, "b"), "b", "sigma")
),
Par = str_replace(key, "b_", "")
) |>
ggplot(aes(x = Type, y = value, color = Par)) +
stat_pointinterval(position = position_dodge()) +
facet_wrap(~Class, scales = "free")Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.
$N
[1] 42
$Y
[1] 15 10 13 13 5 11 10 15 10 13 1 21 31 21 18 14 27 34 49 69 55 28 54 14 18
[26] 20 21 4 22 30 36 13 13 8 0 0 10 1 5 9 4 5
$K
[1] 8
$Kc
[1] 7
$X
Intercept fSEASONSummer fSEASONAutumn fSEASONWinter DENSITYLow
1 1 0 0 0 1
2 1 0 0 0 1
3 1 0 0 0 1
4 1 0 0 0 1
5 1 0 0 0 1
6 1 0 0 0 0
7 1 0 0 0 0
8 1 0 0 0 0
9 1 0 0 0 0
10 1 0 0 0 0
11 1 0 0 0 0
12 1 1 0 0 1
13 1 1 0 0 1
14 1 1 0 0 1
15 1 1 0 0 1
16 1 1 0 0 1
17 1 1 0 0 1
18 1 1 0 0 0
19 1 1 0 0 0
20 1 1 0 0 0
21 1 1 0 0 0
22 1 1 0 0 0
23 1 1 0 0 0
24 1 0 1 0 1
25 1 0 1 0 1
26 1 0 1 0 1
27 1 0 1 0 1
28 1 0 1 0 0
29 1 0 1 0 0
30 1 0 1 0 0
31 1 0 1 0 0
32 1 0 1 0 0
33 1 0 1 0 0
34 1 0 0 1 1
35 1 0 0 1 1
36 1 0 0 1 1
37 1 0 0 1 0
38 1 0 0 1 0
39 1 0 0 1 0
40 1 0 0 1 0
41 1 0 0 1 0
42 1 0 0 1 0
fSEASONSummer:DENSITYLow fSEASONAutumn:DENSITYLow fSEASONWinter:DENSITYLow
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
7 0 0 0
8 0 0 0
9 0 0 0
10 0 0 0
11 0 0 0
12 1 0 0
13 1 0 0
14 1 0 0
15 1 0 0
16 1 0 0
17 1 0 0
18 0 0 0
19 0 0 0
20 0 0 0
21 0 0 0
22 0 0 0
23 0 0 0
24 0 1 0
25 0 1 0
26 0 1 0
27 0 1 0
28 0 0 0
29 0 0 0
30 0 0 0
31 0 0 0
32 0 0 0
33 0 0 0
34 0 0 1
35 0 0 1
36 0 0 1
37 0 0 0
38 0 0 0
39 0 0 0
40 0 0 0
41 0 0 0
42 0 0 0
attr(,"assign")
[1] 0 1 1 1 2 3 3 3
attr(,"contrasts")
attr(,"contrasts")$fSEASON
Summer Autumn Winter
Spring 0 0 0
Summer 1 0 0
Autumn 0 1 0
Winter 0 0 1
attr(,"contrasts")$DENSITY
Low
High 0
Low 1
$prior_only
[1] 0
attr(,"class")
[1] "standata" "list"
// generated with brms 2.22.0
functions {
}
data {
int<lower=1> N; // total number of observations
array[N] int Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> Kc; // number of population-level effects after centering
int prior_only; // should the likelihood be ignored?
}
transformed data {
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
}
parameters {
vector[Kc] b; // regression coefficients
real Intercept; // temporary intercept for centered predictors
}
transformed parameters {
real lprior = 0; // prior contributions to the log posterior
lprior += normal_lpdf(b | 0, 1);
lprior += normal_lpdf(Intercept | 2.4, 1.5);
}
model {
// likelihood including constants
if (!prior_only) {
target += poisson_log_glm_lpmf(Y | Xc, Intercept, b);
}
// priors including constants
target += lprior;
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
// additionally sample draws from priors
real prior_b = normal_rng(0,1);
real prior_Intercept = normal_rng(2.4,1.5);
}
6 MCMC sampling diagnostics
The bayesplot package offers a range of MCMC diagnostics as well as Posterior Probability Checks (PPC), all of which have a convenient plot() interface. Lets start with the MCMC diagnostics.
bayesplot MCMC module:
mcmc_acf
mcmc_acf_bar
mcmc_areas
mcmc_areas_ridges
mcmc_combo
mcmc_dens
mcmc_dens_chains
mcmc_dens_overlay
mcmc_hex
mcmc_hist
mcmc_hist_by_chain
mcmc_intervals
mcmc_neff
mcmc_neff_hist
mcmc_nuts_acceptance
mcmc_nuts_divergence
mcmc_nuts_energy
mcmc_nuts_stepsize
mcmc_nuts_treedepth
mcmc_pairs
mcmc_parcoord
mcmc_rank_ecdf
mcmc_rank_hist
mcmc_rank_overlay
mcmc_recover_hist
mcmc_recover_intervals
mcmc_recover_scatter
mcmc_rhat
mcmc_rhat_hist
mcmc_scatter
mcmc_trace
mcmc_trace_highlight
mcmc_violin
Of these, we will focus on:
- mcmc_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different shade of blue, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
The chains appear well mixed and very similar
- acf (auto-correlation function): plots the auto-correlation between successive MCMC sample lags for each parameter and each chain
There is no evidence of auto-correlation in the MCMC samples
- Rhat: Rhat is a measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
All Rhat values are below 1.05, suggesting the chains have converged.
neff (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Ratios all very high.
The rstan package offers a range of MCMC diagnostics. Lets start with the MCMC diagnostics.
Of these, we will focus on:
- stan_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
The chains appear well mixed and very similar
- stan_acf (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
There is no evidence of auto-correlation in the MCMC samples
- stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
All Rhat values are below 1.05, suggesting the chains have converged.
stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
Ratios all very high.
The ggmean package also has a set of MCMC diagnostic functions. Lets start with the MCMC diagnostics.
Of these, we will focus on:
- ggs_traceplot: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
The chains appear well mixed and very similar
- gss_autocorrelation (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
There is no evidence of auto-correlation in the MCMC samples
- stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
All Rhat values are below 1.05, suggesting the chains have converged.
stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
always returns an ungrouped data frame and adjust accordingly.
ℹ The deprecated feature was likely used in the ggmcmc package.
Please report the issue at <https://github.com/xfim/ggmcmc/issues/>.
Ratios all very high.
The brms package offers a range of MCMC diagnostics. Lets start with the MCMC diagnostics.
Of these, we will focus on:
- stan_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
'pars' not specified. Showing first 10 parameters by default.
'pars' not specified. Showing first 10 parameters by default.
The chains appear well mixed and very similar
- stan_acf (auto-correlation function): plots the auto-correlation between successive MCMC sample lags for each parameter and each chain
There is no evidence of auto-correlation in the MCMC samples
- stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
All Rhat values are below 1.05, suggesting the chains have converged.
stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
Ratios all very high.
7 Model validation
Post predictive checks provide additional diagnostics about the fit of the model. Specifically, they provide a comparison between predictions drawn from the model and the observed data used to train the model.
bayesplot PPC module:
ppc_bars
ppc_bars_grouped
ppc_boxplot
ppc_dens
ppc_dens_overlay
ppc_dens_overlay_grouped
ppc_ecdf_overlay
ppc_ecdf_overlay_grouped
ppc_error_binned
ppc_error_hist
ppc_error_hist_grouped
ppc_error_scatter
ppc_error_scatter_avg
ppc_error_scatter_avg_grouped
ppc_error_scatter_avg_vs_x
ppc_freqpoly
ppc_freqpoly_grouped
ppc_hist
ppc_intervals
ppc_intervals_grouped
ppc_km_overlay
ppc_km_overlay_grouped
ppc_loo_intervals
ppc_loo_pit_ecdf
ppc_loo_pit_overlay
ppc_loo_pit_qq
ppc_loo_ribbon
ppc_pit_ecdf
ppc_pit_ecdf_grouped
ppc_ribbon
ppc_ribbon_grouped
ppc_rootogram
ppc_scatter
ppc_scatter_avg
ppc_scatter_avg_grouped
ppc_stat
ppc_stat_2d
ppc_stat_freqpoly
ppc_stat_freqpoly_grouped
ppc_stat_grouped
ppc_violin_grouped
- dens_overlay: plots the density distribution of the observed data (black line) overlayed on top of 50 density distributions generated from draws from the model (light blue). Ideally, the 50 realisations should be roughly consistent with the observed data.
The model draws are broadly similar to the observed data.
- error_scatter_avg: this plots the observed values against the average residuals. Similar to a residual plot, we do not want to see any patterns in this plot. There is some pattern remaining in these residuals.
The predictive error seems to be related to the predictor - the model performs poorest at higher recruitments
- error_scatter_avg_vs_x: this is similar to a regular residual plot and as such should be interpreted as such.
- intervals: plots the observed data overlayed on top of posterior predictions associated with each level of the predictor. Ideally, the observed data should all fall within the predictive intervals.
The modelled predictions seem to do a reasonable job of representing the observations.
The shinystan package allows the full suite of MCMC diagnostics and posterior predictive checks to be accessed via a web interface.
DHARMa residuals provide very useful diagnostics. Unfortunately, we cannot directly use the simulateResiduals() function to generate the simulated residuals. However, if we are willing to calculate some of the components yourself, we can still obtain the simulated residuals from the fitted stan model.
We need to supply:
- simulated (predicted) responses associated with each observation.
- observed values
- fitted (predicted) responses (averaged) associated with each observation
preds <- posterior_predict(quinn.rstanarm3, nsamples = 250, summary = FALSE)
quinn.resids <- createDHARMa(
simulatedResponse = t(preds),
observedResponse = quinn$RECRUITS,
fittedPredictedResponse = apply(preds, 2, median),
integerResponse = TRUE
)
plot(quinn.resids)Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
: Fitting terminated with step failure - check results carefully
Conclusions:
- the simulated residuals suggest there might be an issue of dispersion.
- it might be worth exploring either zero-inflation, a negative binomial model, or including a observation-level random effect.
Post predictive checks provide additional diagnostics about the fit of the model. Specifically, they provide a comparison between predictions drawn from the model and the observed data used to train the model.
bayesplot PPC module:
ppc_bars
ppc_bars_grouped
ppc_boxplot
ppc_dens
ppc_dens_overlay
ppc_dens_overlay_grouped
ppc_ecdf_overlay
ppc_ecdf_overlay_grouped
ppc_error_binned
ppc_error_hist
ppc_error_hist_grouped
ppc_error_scatter
ppc_error_scatter_avg
ppc_error_scatter_avg_grouped
ppc_error_scatter_avg_vs_x
ppc_freqpoly
ppc_freqpoly_grouped
ppc_hist
ppc_intervals
ppc_intervals_grouped
ppc_km_overlay
ppc_km_overlay_grouped
ppc_loo_intervals
ppc_loo_pit_ecdf
ppc_loo_pit_overlay
ppc_loo_pit_qq
ppc_loo_ribbon
ppc_pit_ecdf
ppc_pit_ecdf_grouped
ppc_ribbon
ppc_ribbon_grouped
ppc_rootogram
ppc_scatter
ppc_scatter_avg
ppc_scatter_avg_grouped
ppc_stat
ppc_stat_2d
ppc_stat_freqpoly
ppc_stat_freqpoly_grouped
ppc_stat_grouped
ppc_violin_grouped
- dens_overlay: plots the density distribution of the observed data (black line) overlayed on top of 50 density distributions generated from draws from the model (light blue). Ideally, the 50 realisations should be roughly consistent with the observed data.
The model draws appear to represent the shape of the observed data reasonably well
- error_scatter_avg: this plots the observed values against the average residuals. Similar to a residual plot, we do not want to see any patterns in this plot. There is some pattern remaining in these residuals.
Using all posterior draws for ppc type 'error_scatter_avg' by default.
The predictive error seems to be related to the predictor - the model performs poorest at higher recruitments.
- intervals: plots the observed data overlayed on top of posterior predictions associated with each level of the predictor. Ideally, the observed data should all fall within the predictive intervals.
Using all posterior draws for ppc type 'intervals' by default.
Whilst the modelled predictions do a reasonable job of representing the observed data, the observed data do appear to be more varied than the model is representing.
The shinystan package allows the full suite of MCMC diagnostics and posterior predictive checks to be accessed via a web interface.
DHARMa residuals provide very useful diagnostics. Unfortunately, we cannot directly use the simulateResiduals() function to generate the simulated residuals. However, if we are willing to calculate some of the components yourself, we can still obtain the simulated residuals from the fitted stan model.
We need to supply:
- simulated (predicted) responses associated with each observation.
- observed values
- fitted (predicted) responses (averaged) associated with each observation
Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
instead.
quinn.resids <- createDHARMa(
simulatedResponse = t(preds),
observedResponse = quinn$RECRUITS,
fittedPredictedResponse = apply(preds, 2, median),
integerResponse = TRUE
)
quinn.resids |> plot()
DHARMa nonparametric dispersion test via sd of residuals fitted vs.
simulated
data: simulationOutput
dispersion = 2.6375, p-value < 2.2e-16
alternative hypothesis: two.sided
DHARMa zero-inflation test via comparison to expected zeros with
simulation under H0 = fitted model
data: simulationOutput
ratioObsSim = 7.5758, p-value = 0.072
alternative hypothesis: two.sided
quinn.resids <- make_brms_dharma_res(quinn.brmP, integerResponse = TRUE)
wrap_elements(~ testUniformity(quinn.resids)) +
wrap_elements(~ plotResiduals(quinn.resids, form = factor(rep(1, nrow(quinn))))) +
wrap_elements(~ plotResiduals(quinn.resids, quantreg = TRUE)) +
wrap_elements(~ testDispersion(quinn.resids))Conclusions:
- the simulated residuals do suggest that there might be a dispersion issue
- it might be worth exploring either zero-inflation, a negative binomial model, or including a observation-level random effect.
8 Explore negative binomial model
quinn.rstanarmNB <- stan_glm(RECRUITS ~ fSEASON * DENSITY,
data = quinn,
family = neg_binomial_2(link = "log"),
prior_intercept = normal(2.3, 2, autoscale = FALSE),
prior = normal(0, 10, autoscale = FALSE),
prior_aux = rstanarm::exponential(rate = 1, autoscale = FALSE),
prior_PD = FALSE,
refresh = 0,
chains = 3, iter = 5000, thin = 5, warmup = 2000
)Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
There seems to be a bug here. The expected values should be being back transformed to the response scale, however, they are clearly not. Notice that the expected values (and associated CI) are low and tiny respectively).
preds <- posterior_predict(quinn.rstanarmNB, nsamples = 250, summary = FALSE)
quinn.resids <- createDHARMa(
simulatedResponse = t(preds),
observedResponse = quinn$RECRUITS,
fittedPredictedResponse = apply(preds, 2, median),
integerResponse = TRUE
)
plot(quinn.resids)
DHARMa nonparametric dispersion test via sd of residuals fitted vs.
simulated
data: simulationOutput
dispersion = 0.25556, p-value = 0.02556
alternative hypothesis: two.sided
Now possibly under-dispersed..
Warning: Found 1 observation(s) with a pareto_k > 0.7. We recommend calling 'loo' again with argument 'k_threshold = 0.7' in order to calculate the ELPD without the assumption that these observations are negligible. This will refit the model 1 times to compute the ELPDs for the problematic observations directly.
Computed from 1800 by 42 log-likelihood matrix.
Estimate SE
elpd_loo -170.1 15.4
p_loo 23.3 5.2
looic 340.3 30.8
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.7, 1.1]).
Pareto k diagnostic values:
Count Pct. Min. ESS
(-Inf, 0.69] (good) 41 97.6% 129
(0.69, 1] (bad) 0 0.0% <NA>
(1, Inf) (very bad) 1 2.4% <NA>
See help('pareto-k-diagnostic') for details.
Warning: Found 1 observation(s) with a pareto_k > 0.7. We recommend calling 'loo' again with argument 'k_threshold = 0.7' in order to calculate the ELPD without the assumption that these observations are negligible. This will refit the model 1 times to compute the ELPDs for the problematic observations directly.
Computed from 1800 by 42 log-likelihood matrix.
Estimate SE
elpd_loo -150.9 6.1
p_loo 9.6 3.7
looic 301.8 12.2
------
MCSE of elpd_loo is NA.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.8, 1.0]).
Pareto k diagnostic values:
Count Pct. Min. ESS
(-Inf, 0.69] (good) 41 97.6% 597
(0.69, 1] (bad) 0 0.0% <NA>
(1, Inf) (very bad) 1 2.4% <NA>
See help('pareto-k-diagnostic') for details.
elpd_diff se_diff
quinn.rstanarmNB 0.0 0.0
quinn.rstanarmP -19.2 11.8
quinn.form <- bf(RECRUITS ~ fSEASON * DENSITY, family = negbinomial(link = "log"))
get_prior(quinn.form, data = quinn) prior class coef group resp dpar
(flat) b
(flat) b DENSITYLow
(flat) b fSEASONAutumn
(flat) b fSEASONAutumn:DENSITYLow
(flat) b fSEASONSummer
(flat) b fSEASONSummer:DENSITYLow
(flat) b fSEASONWinter
(flat) b fSEASONWinter:DENSITYLow
student_t(3, 2.6, 2.5) Intercept
inv_gamma(0.4, 0.3) shape
nlpar lb ub source
default
(vectorized)
(vectorized)
(vectorized)
(vectorized)
(vectorized)
(vectorized)
(vectorized)
default
0 default
priors <- prior(normal(2.4, 1.5), class = "Intercept") +
prior(normal(0, 1), class = "b") +
prior(gamma(0.01, 0.01), class = "shape")
quinn.brmsNB <- brm(quinn.form,
data = quinn,
prior = priors,
refresh = 0,
chains = 3,
iter = 5000,
thin = 5,
warmup = 2500,
backend = "rstan"
)Compiling Stan program...
Start sampling
Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
instead.
quinn.resids <- createDHARMa(
simulatedResponse = t(preds),
observedResponse = quinn$RECRUITS,
fittedPredictedResponse = apply(preds, 2, median),
integerResponse = TRUE
)
plot(quinn.resids)quinn.resids <- make_brms_dharma_res(quinn.brmsNB, integerResponse = TRUE)
wrap_elements(~ testUniformity(quinn.resids)) +
## wrap_elements(~plotResiduals(quinn.resids, form = factor(rep(1, nrow(quinn))))) +
wrap_elements(~ plotResiduals(quinn.resids, quantreg = TRUE)) +
wrap_elements(~ testDispersion(quinn.resids))9 Partial effects plots
quinn.rstanarmNB |>
fitted_draws(newdata = quinn) |>
median_hdci() |>
ggplot(aes(x = fSEASON, colour = DENSITY, y = .value)) +
geom_pointrange(aes(ymin = .lower, ymax = .upper), position = position_dodge(width = 0.2)) +
geom_line(position = position_dodge(width = 0.2)) +
geom_point(data = quinn, aes(y = RECRUITS, x = fSEASON, colour = DENSITY), position = position_dodge(width = 0.2))Warning: `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
- Use [add_]epred_draws() to get the expectation of the posterior predictive.
- Use [add_]linpred_draws() to get the distribution of the linear predictor.
- For example, you used [add_]fitted_draws(..., scale = "response"), which
means you most likely want [add_]epred_draws(...).
NOTE: When updating to the new functions, note that the `model` parameter is now
named `object` and the `n` parameter is now named `ndraws`.
quinn.brmsNB |>
fitted_draws(newdata = quinn) |>
median_hdci() |>
ggplot(aes(x = fSEASON, colour = DENSITY, y = .value)) +
geom_pointrange(aes(ymin = .lower, ymax = .upper), position = position_dodge(width = 0.2)) +
geom_line(position = position_dodge(width = 0.2)) +
geom_point(data = quinn, aes(y = RECRUITS, x = fSEASON, colour = DENSITY), position = position_dodge(width = 0.2))Warning: `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
- Use [add_]epred_draws() to get the expectation of the posterior predictive.
- Use [add_]linpred_draws() to get the distribution of the linear predictor.
- For example, you used [add_]fitted_draws(..., scale = "response"), which
means you most likely want [add_]epred_draws(...).
NOTE: When updating to the new functions, note that the `model` parameter is now
named `object` and the `n` parameter is now named `ndraws`.
10 Model investigation
The summary() method generates simple summaries (mean, standard deviation as well as 10, 50 and 90 percentiles).
Model Info:
function: stan_glm
family: neg_binomial_2 [log]
formula: RECRUITS ~ fSEASON * DENSITY
algorithm: sampling
sample: 1800 (posterior sample size)
priors: see help('prior_summary')
observations: 42
predictors: 8
Estimates:
mean sd 10% 50% 90%
(Intercept) 2.3 0.3 2.0 2.3 2.6
fSEASONSummer 1.6 0.3 1.2 1.6 2.0
fSEASONAutumn 0.7 0.3 0.3 0.7 1.1
fSEASONWinter -0.6 0.4 -1.1 -0.6 -0.1
DENSITYLow 0.1 0.4 -0.4 0.1 0.6
fSEASONSummer:DENSITYLow -0.9 0.5 -1.6 -0.9 -0.3
fSEASONAutumn:DENSITYLow -0.2 0.5 -0.9 -0.2 0.5
fSEASONWinter:DENSITYLow -0.9 0.7 -1.8 -0.9 0.0
reciprocal_dispersion 3.9 1.2 2.5 3.8 5.7
Fit Diagnostics:
mean sd 10% 50% 90%
mean_PPD 19.5 3.2 15.8 19.1 23.6
The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
MCMC diagnostics
mcse Rhat n_eff
(Intercept) 0.0 1.0 1708
fSEASONSummer 0.0 1.0 1745
fSEASONAutumn 0.0 1.0 1697
fSEASONWinter 0.0 1.0 1664
DENSITYLow 0.0 1.0 1609
fSEASONSummer:DENSITYLow 0.0 1.0 1613
fSEASONAutumn:DENSITYLow 0.0 1.0 1659
fSEASONWinter:DENSITYLow 0.0 1.0 1649
reciprocal_dispersion 0.0 1.0 1664
mean_PPD 0.1 1.0 1593
log-posterior 0.1 1.0 1521
For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
Conclusions:
- the intercept represents the estimated mean of the first combination of Season (Spring) and Density (High). On the link scale this is 2.32
- the difference between Low and High adult density in spring is 1.58, although this is not significant
- the difference between Spring and Summer for High adult density is 0.68
- the difference between Spring and Autumn for High adult density is -0.58
- the difference between Spring and Winter for High adult density is 0.12
- if there were no interactions, we would expect the Low density Summer recruitment to be the additive of the main effects (Low and Summer). However, the modelled mean is 0.91 less than the additive effects would have expected. This value is significantly different to 0, indicating that there is evidence that the density effect in Summer is different to that in Spring.
- the density effect in Autumn and Winter were not found to be significantly different from what you would expect from an additive model.
tidyMCMC(quinn.rstanarmNB$stanfit, estimate.method = "median", conf.int = TRUE, conf.method = "HPDinterval", rhat = TRUE, ess = TRUE)# A tibble: 11 × 7
term estimate std.error conf.low conf.high rhat ess
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 (Intercept) 2.32 0.256 1.84 2.82 1.000 1708
2 fSEASONSummer 1.58 0.339 0.903 2.21 0.999 1745
3 fSEASONAutumn 0.680 0.344 0.00313 1.31 1.000 1697
4 fSEASONWinter -0.584 0.390 -1.35 0.175 1.00 1664
5 DENSITYLow 0.125 0.385 -0.623 0.863 1.00 1609
6 fSEASONSummer:DENSITYLow -0.914 0.499 -1.84 0.121 0.999 1613
7 fSEASONAutumn:DENSITYLow -0.184 0.538 -1.25 0.863 1.000 1659
8 fSEASONWinter:DENSITYLow -0.884 0.687 -2.19 0.490 1.00 1649
9 reciprocal_dispersion 3.94 1.25 1.78 6.42 1.00 1664
10 mean_PPD 19.5 3.17 13.5 25.5 1.000 1593
11 log-posterior -155. 2.43 -160. -151. 1.00 1521
Conclusions:
See above
quinn.rstanarmNB$stanfit |>
summarise_draws(
median,
HDInterval::hdi,
rhat, length, ess_bulk, ess_tail
)# A tibble: 11 × 8
variable median lower upper rhat length ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 2.31 1.84e+0 2.82 1.000 1800 1718. 1713.
2 fSEASONSummer 1.59 9.03e-1 2.21 1.00 1800 1748. 1552.
3 fSEASONAutumn 0.672 3.13e-3 1.31 1.00 1800 1701. 1695.
4 fSEASONWinter -0.592 -1.35e+0 0.175 1.00 1800 1678. 1637.
5 DENSITYLow 0.136 -6.23e-1 0.863 1.00 1800 1616. 1643.
6 fSEASONSummer:DENS… -0.917 -1.84e+0 0.121 1.00 1800 1615. 1707.
7 fSEASONAutumn:DENS… -0.180 -1.25e+0 0.863 1.000 1800 1663. 1661.
8 fSEASONWinter:DENS… -0.875 -2.19e+0 0.490 1.00 1800 1667. 1485.
9 reciprocal_dispers… 3.76 1.78e+0 6.42 1.00 1800 1486. 1598.
10 mean_PPD 19.1 1.35e+1 25.5 1.00 1800 1622. 1637.
11 log-posterior -155. -1.60e+2 -151. 1.00 1800 1543. 1526.
We can also alter the CI level.
quinn.rstanarmNB$stanfit |>
summarise_draws(
median,
~ HDInterval::hdi(.x, credMass = 0.9),
rhat, length, ess_bulk, ess_tail
)# A tibble: 11 × 8
variable median lower upper rhat length ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 2.31 1.91 2.74e+0 1.000 1800 1718. 1713.
2 fSEASONSummer 1.59 1.01 2.10e+0 1.00 1800 1748. 1552.
3 fSEASONAutumn 0.672 0.161 1.27e+0 1.00 1800 1701. 1695.
4 fSEASONWinter -0.592 -1.16 9.88e-2 1.00 1800 1678. 1637.
5 DENSITYLow 0.136 -0.549 6.98e-1 1.00 1800 1616. 1643.
6 fSEASONSummer:DENS… -0.917 -1.75 -1.39e-1 1.00 1800 1615. 1707.
7 fSEASONAutumn:DENS… -0.180 -0.999 7.69e-1 1.000 1800 1663. 1661.
8 fSEASONWinter:DENS… -0.875 -2.06 1.92e-1 1.00 1800 1667. 1485.
9 reciprocal_dispers… 3.76 2.09 5.94e+0 1.00 1800 1486. 1598.
10 mean_PPD 19.1 14.8 2.46e+1 1.00 1800 1622. 1637.
11 log-posterior -155. -159. -1.52e+2 1.00 1800 1543. 1526.
Arguably, it would be better to back-transform to the ratio scale
quinn.rstanarmNB$stanfit |>
summarise_draws(
~ median(exp(.x)),
~ HDInterval::hdi(exp(.x)),
rhat, length, ess_bulk, ess_tail
)# A tibble: 11 × 8
variable `~median(exp(.x))` lower upper rhat length ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Interce… 1.01e+ 1 5.77e+ 0 1.60e+ 1 1.000 1800 1718. 1713.
2 fSEASONS… 4.89e+ 0 2.33e+ 0 8.91e+ 0 1.00 1800 1748. 1552.
3 fSEASONA… 1.96e+ 0 9.49e- 1 3.57e+ 0 1.00 1800 1701. 1695.
4 fSEASONW… 5.53e- 1 2.20e- 1 1.08e+ 0 1.00 1800 1678. 1637.
5 DENSITYL… 1.15e+ 0 4.21e- 1 2.16e+ 0 1.00 1800 1616. 1643.
6 fSEASONS… 4.00e- 1 9.98e- 2 9.51e- 1 1.00 1800 1615. 1707.
7 fSEASONA… 8.35e- 1 2.00e- 1 2.05e+ 0 1.000 1800 1663. 1661.
8 fSEASONW… 4.17e- 1 4.16e- 2 1.24e+ 0 1.00 1800 1667. 1485.
9 reciproc… 4.31e+ 1 3.54e+ 0 4.94e+ 2 1.00 1800 1486. 1598.
10 mean_PPD 1.96e+ 8 1.75e+ 5 7.73e+10 1.00 1800 1622. 1637.
11 log-post… 5.42e-68 1.99e-73 9.25e-67 1.00 1800 1543. 1526.
# A draws_df: 600 iterations, 3 chains, and 11 variables
(Intercept) fSEASONSummer fSEASONAutumn fSEASONWinter DENSITYLow
1 2.5 1.4 0.35 -0.63 -0.00021
2 2.5 1.9 0.63 -1.01 -0.17478
3 2.2 1.4 0.86 -0.18 0.21751
4 2.0 1.4 1.13 -0.51 0.46422
5 2.2 1.8 0.89 -0.40 -0.18992
6 2.0 2.4 1.00 -0.63 0.45984
7 2.1 1.7 0.79 -0.53 0.36788
8 1.9 2.1 1.05 -0.10 0.54228
9 2.5 1.5 0.28 -1.02 -0.17071
10 2.3 1.1 0.76 -0.60 0.52501
fSEASONSummer:DENSITYLow fSEASONAutumn:DENSITYLow fSEASONWinter:DENSITYLow
1 -0.78 -0.423 -0.428
2 -0.96 -0.668 -0.042
3 -0.56 -0.137 -0.756
4 -1.12 -0.431 -1.580
5 -0.61 0.084 -1.060
6 -1.98 -0.147 -1.417
7 -0.73 -0.664 0.026
8 -1.44 -0.146 -0.745
9 -0.74 0.019 -0.947
10 -0.85 -1.005 -0.833
# ... with 1790 more draws, and 3 more variables
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}
quinn.rstanarmNB$stanfit |>
as_draws_df() |>
summarise_draws(
median,
~ HDInterval::hdi(.x),
rhat,
ess_bulk
)# A tibble: 11 × 6
variable median lower upper rhat ess_bulk
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 2.31 1.84 2.82 1.000 1718.
2 fSEASONSummer 1.59 0.903 2.21 1.00 1748.
3 fSEASONAutumn 0.672 0.00313 1.31 1.00 1701.
4 fSEASONWinter -0.592 -1.35 0.175 1.00 1678.
5 DENSITYLow 0.136 -0.623 0.863 1.00 1616.
6 fSEASONSummer:DENSITYLow -0.917 -1.84 0.121 1.00 1615.
7 fSEASONAutumn:DENSITYLow -0.180 -1.25 0.863 1.000 1663.
8 fSEASONWinter:DENSITYLow -0.875 -2.19 0.490 1.00 1667.
9 reciprocal_dispersion 3.76 1.78 6.42 1.00 1486.
10 mean_PPD 19.1 13.5 25.5 1.00 1622.
11 log-posterior -155. -160. -151. 1.00 1543.
quinn.rstanarmNB$stanfit |>
as_draws_df() |>
exp() |>
summarise_draws(
median,
~ HDInterval::hdi(.x),
rhat,
ess_bulk
)# A tibble: 11 × 6
variable median lower upper rhat ess_bulk
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 1.01e+ 1 5.77e+ 0 1.60e+ 1 1.000 1718.
2 fSEASONSummer 4.89e+ 0 2.33e+ 0 8.91e+ 0 1.00 1748.
3 fSEASONAutumn 1.96e+ 0 9.49e- 1 3.57e+ 0 1.000 1701.
4 fSEASONWinter 5.53e- 1 2.20e- 1 1.08e+ 0 1.00 1678.
5 DENSITYLow 1.15e+ 0 4.21e- 1 2.16e+ 0 1.00 1616.
6 fSEASONSummer:DENSITYLow 4.00e- 1 9.98e- 2 9.51e- 1 1.00 1615.
7 fSEASONAutumn:DENSITYLow 8.35e- 1 2.00e- 1 2.05e+ 0 1.000 1663.
8 fSEASONWinter:DENSITYLow 4.17e- 1 4.16e- 2 1.24e+ 0 1.00 1667.
9 reciprocal_dispersion 4.31e+ 1 3.54e+ 0 4.94e+ 2 1.00 1486.
10 mean_PPD 1.96e+ 8 1.75e+ 5 7.73e+10 1.00 1622.
11 log-posterior 5.42e-68 1.99e-73 9.25e-67 1.00 1543.
Due to the presence of a log transform in the predictor, it is better to use the regex version.
[1] "(Intercept)" "fSEASONSummer"
[3] "fSEASONAutumn" "fSEASONWinter"
[5] "DENSITYLow" "fSEASONSummer:DENSITYLow"
[7] "fSEASONAutumn:DENSITYLow" "fSEASONWinter:DENSITYLow"
[9] "reciprocal_dispersion" "accept_stat__"
[11] "stepsize__" "treedepth__"
[13] "n_leapfrog__" "divergent__"
[15] "energy__"
quinn.draw <- quinn.rstanarmNB |> gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE)
quinn.draw# A tibble: 14,400 × 5
# Groups: .variable [8]
.chain .iteration .draw .variable .value
<int> <int> <int> <chr> <dbl>
1 1 1 1 (Intercept) 2.49
2 1 2 2 (Intercept) 2.50
3 1 3 3 (Intercept) 2.15
4 1 4 4 (Intercept) 2.03
5 1 5 5 (Intercept) 2.24
6 1 6 6 (Intercept) 2.03
7 1 7 7 (Intercept) 2.14
8 1 8 8 (Intercept) 1.90
9 1 9 9 (Intercept) 2.53
10 1 10 10 (Intercept) 2.32
# ℹ 14,390 more rows
exceedP <- function(x, Val = 0) mean(x > Val)
quinn.rstanarmNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
mutate(.value = exp(.value)) |>
summarise_draws(
median,
HDInterval::hdi,
rhat,
length,
ess_bulk,
ess_tail,
~ exceedP(.x, 1)
)# A tibble: 8 × 10
# Groups: .variable [8]
.variable variable median lower upper rhat length ess_bulk ess_tail
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) .value 10.1 5.77 16.0 1.000 1800 1718. 1713.
2 DENSITYLow .value 1.15 0.421 2.16 1.00 1800 1616. 1643.
3 fSEASONAutumn .value 1.96 0.949 3.57 1.000 1800 1701. 1695.
4 fSEASONAutumn:DE… .value 0.835 0.200 2.05 1.000 1800 1663. 1661.
5 fSEASONSummer .value 4.89 2.33 8.91 1.00 1800 1748. 1552.
6 fSEASONSummer:DE… .value 0.400 0.0998 0.951 1.00 1800 1615. 1707.
7 fSEASONWinter .value 0.553 0.220 1.08 1.00 1800 1678. 1637.
8 fSEASONWinter:DE… .value 0.417 0.0416 1.24 1.00 1800 1667. 1485.
# ℹ 1 more variable: `~exceedP(.x, 1)` <dbl>
We can then summarise this
# A tibble: 8 × 7
.variable .value .lower .upper .width .point .interval
<chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 (Intercept) 2.31 1.85 2.84 0.95 median hdci
2 DENSITYLow 0.136 -0.623 0.863 0.95 median hdci
3 fSEASONAutumn 0.672 0.00313 1.31 0.95 median hdci
4 fSEASONAutumn:DENSITYLow -0.180 -1.25 0.863 0.95 median hdci
5 fSEASONSummer 1.59 0.903 2.21 0.95 median hdci
6 fSEASONSummer:DENSITYLow -0.917 -1.84 0.121 0.95 median hdci
7 fSEASONWinter -0.592 -1.30 0.245 0.95 median hdci
8 fSEASONWinter:DENSITYLow -0.875 -2.18 0.503 0.95 median hdci
We could alternatively express the parameters on the response scale.
# A tibble: 8 × 7
.variable `exp(.value)` .lower .upper .width .point .interval
<chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 (Intercept) 10.1 6.06 16.3 0.95 median hdci
2 DENSITYLow 1.15 0.447 2.19 0.95 median hdci
3 fSEASONAutumn 1.96 0.949 3.57 0.95 median hdci
4 fSEASONAutumn:DENSITYLow 0.835 0.179 2.05 0.95 median hdci
5 fSEASONSummer 4.89 2.33 8.91 0.95 median hdci
6 fSEASONSummer:DENSITYLow 0.400 0.0998 0.951 0.95 median hdci
7 fSEASONWinter 0.553 0.220 1.08 0.95 median hdci
8 fSEASONWinter:DENSITYLow 0.417 0.0416 1.24 0.95 median hdci
quinn.rstanarmNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
stat_halfeye(aes(x = .value, y = .variable)) +
facet_wrap(~.variable, scales = "free")quinn.rstanarmNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
geom_vline(xintercept = 0, linetype = "dashed") +
stat_halfeye(aes(x = .value, y = .variable)) +
theme_classic()We could alternatively express the parameters on the response scale.
quinn.rstanarmNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
group_by(.variable) |>
mutate(.value = exp(.value)) |>
median_hdci()# A tibble: 8 × 7
.variable .value .lower .upper .width .point .interval
<chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 (Intercept) 10.1 6.06 16.3 0.95 median hdci
2 DENSITYLow 1.15 0.447 2.19 0.95 median hdci
3 fSEASONAutumn 1.96 0.949 3.57 0.95 median hdci
4 fSEASONAutumn:DENSITYLow 0.835 0.179 2.05 0.95 median hdci
5 fSEASONSummer 4.89 2.33 8.91 0.95 median hdci
6 fSEASONSummer:DENSITYLow 0.400 0.0998 0.951 0.95 median hdci
7 fSEASONWinter 0.553 0.220 1.08 0.95 median hdci
8 fSEASONWinter:DENSITYLow 0.417 0.0416 1.24 0.95 median hdci
quinn.rstanarmNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
mutate(.value = exp(.value)) |>
ggplot() +
geom_vline(xintercept = 1, linetype = "dashed") +
stat_halfeye(aes(x = .value, y = .variable)) +
scale_x_continuous("", trans = scales::log2_trans(), breaks = unique(as.vector(2^(0:4 %o% c(-1, 1))))) +
theme_classic()## Link scale
quinn.rstanarmNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
stat_slab(aes(
x = .value, y = .variable,
fill = stat(ggdist::cut_cdf_qi(cdf,
.width = c(0.5, 0.8, 0.95),
labels = scales::percent_format()
))
), color = "black") +
geom_vline(xintercept = 0, linetype = "dashed") +
scale_fill_brewer("Interval", direction = -1, na.translate = FALSE)Warning: `stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95), labels =
scales::percent_format()))` was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(ggdist::cut_cdf_qi(cdf, .width = c(0.5, 0.8, 0.95),
labels = scales::percent_format()))` instead.
## Fractional scale
quinn.rstanarmNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
mutate(.value = exp(.value)) |>
ggplot() +
stat_slab(aes(
x = .value, y = .variable,
fill = stat(ggdist::cut_cdf_qi(cdf,
.width = c(0.5, 0.8, 0.95),
labels = scales::percent_format()
))
), color = "black") +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_fill_brewer("Interval", direction = -1, na.translate = FALSE) +
scale_x_continuous(trans = scales::log2_trans())quinn.rstanarmNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
stat_halfeye(aes(x = .value, y = .variable)) +
facet_wrap(~.variable, scales = "free")quinn.rstanarmNB |>
gather_draws(`.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
stat_halfeye(aes(x = .value, y = .variable)) +
geom_vline(xintercept = 0, linetype = "dashed")quinn.rstanarmNB |>
gather_draws(`.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
stat_halfeye(aes(x = exp(.value), y = .variable)) +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_x_continuous(trans = scales::log2_trans())quinn.rstanarmNB |>
gather_draws(`.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
geom_density_ridges(aes(x = .value, y = .variable), alpha = 0.4) +
geom_vline(xintercept = 0, linetype = "dashed")Picking joint bandwidth of 0.0886
## Or on a fractional scale
quinn.rstanarmNB |>
gather_draws(`.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
geom_density_ridges_gradient(
aes(
x = exp(.value),
y = .variable,
fill = stat(x)
),
alpha = 0.4, colour = "white",
quantile_lines = TRUE,
quantiles = c(0.025, 0.975)
) +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_x_continuous(trans = scales::log2_trans()) +
scale_fill_viridis_c(option = "C")Picking joint bandwidth of 0.128
This is purely a graphical depiction on the posteriors.
# A tibble: 1,800 × 18
.chain .iteration .draw `(Intercept)` fSEASONSummer fSEASONAutumn
<int> <int> <int> <dbl> <dbl> <dbl>
1 1 1 1 2.49 1.43 0.354
2 1 2 2 2.50 1.89 0.626
3 1 3 3 2.15 1.41 0.855
4 1 4 4 2.03 1.44 1.13
5 1 5 5 2.24 1.83 0.894
6 1 6 6 2.03 2.41 1.00
7 1 7 7 2.14 1.66 0.791
8 1 8 8 1.90 2.06 1.05
9 1 9 9 2.53 1.49 0.282
10 1 10 10 2.32 1.06 0.765
# ℹ 1,790 more rows
# ℹ 12 more variables: fSEASONWinter <dbl>, DENSITYLow <dbl>,
# `fSEASONSummer:DENSITYLow` <dbl>, `fSEASONAutumn:DENSITYLow` <dbl>,
# `fSEASONWinter:DENSITYLow` <dbl>, reciprocal_dispersion <dbl>,
# accept_stat__ <dbl>, stepsize__ <dbl>, treedepth__ <dbl>,
# n_leapfrog__ <dbl>, divergent__ <dbl>, energy__ <dbl>
# A tibble: 1,800 × 11
.chain .iteration .draw `(Intercept)` fSEASONSummer fSEASONAutumn
<int> <int> <int> <dbl> <dbl> <dbl>
1 1 1 1 2.49 1.43 0.354
2 1 2 2 2.50 1.89 0.626
3 1 3 3 2.15 1.41 0.855
4 1 4 4 2.03 1.44 1.13
5 1 5 5 2.24 1.83 0.894
6 1 6 6 2.03 2.41 1.00
7 1 7 7 2.14 1.66 0.791
8 1 8 8 1.90 2.06 1.05
9 1 9 9 2.53 1.49 0.282
10 1 10 10 2.32 1.06 0.765
# ℹ 1,790 more rows
# ℹ 5 more variables: fSEASONWinter <dbl>, DENSITYLow <dbl>,
# `fSEASONSummer:DENSITYLow` <dbl>, `fSEASONAutumn:DENSITYLow` <dbl>,
# `fSEASONWinter:DENSITYLow` <dbl>
Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.
# A tibble: 1,800 × 9
`(Intercept)` fSEASONSummer fSEASONAutumn fSEASONWinter DENSITYLow
<dbl> <dbl> <dbl> <dbl> <dbl>
1 2.49 1.43 0.354 -0.629 -0.000214
2 2.50 1.89 0.626 -1.01 -0.175
3 2.15 1.41 0.855 -0.181 0.218
4 2.03 1.44 1.13 -0.513 0.464
5 2.24 1.83 0.894 -0.397 -0.190
6 2.03 2.41 1.00 -0.630 0.460
7 2.14 1.66 0.791 -0.526 0.368
8 1.90 2.06 1.05 -0.0997 0.542
9 2.53 1.49 0.282 -1.02 -0.171
10 2.32 1.06 0.765 -0.601 0.525
# ℹ 1,790 more rows
# ℹ 4 more variables: `fSEASONSummer:DENSITYLow` <dbl>,
# `fSEASONAutumn:DENSITYLow` <dbl>, `fSEASONWinter:DENSITYLow` <dbl>,
# reciprocal_dispersion <dbl>
The summary() method generates simple summaries (mean, standard deviation as well as 10, 50 and 90 percentiles).
Family: negbinomial
Links: mu = log; shape = identity
Formula: RECRUITS ~ fSEASON * DENSITY
Data: quinn (Number of observations: 42)
Draws: 3 chains, each with iter = 5000; warmup = 2500; thin = 5;
total post-warmup draws = 1500
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
Intercept 2.42 0.19 2.05 2.79 1.00 1518
fSEASONSummer 1.40 0.25 0.88 1.89 1.00 1467
fSEASONAutumn 0.55 0.25 0.03 1.05 1.00 1493
fSEASONWinter -0.68 0.29 -1.22 -0.09 1.00 1591
DENSITYLow -0.05 0.27 -0.56 0.44 1.00 1540
fSEASONSummer:DENSITYLow -0.65 0.35 -1.32 0.04 1.00 1486
fSEASONAutumn:DENSITYLow 0.01 0.37 -0.72 0.73 1.00 1455
fSEASONWinter:DENSITYLow -0.61 0.48 -1.58 0.32 1.00 1535
Tail_ESS
Intercept 1501
fSEASONSummer 1542
fSEASONAutumn 1500
fSEASONWinter 1461
DENSITYLow 1409
fSEASONSummer:DENSITYLow 1459
fSEASONAutumn:DENSITYLow 1314
fSEASONWinter:DENSITYLow 1478
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 6.78 3.18 2.98 14.93 1.00 1538 1482
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Conclusions:
- the intercept represents the estimated mean of the first combination of Season (Spring) and Density (High). On the link scale this is 2.42
- the difference between Low and High adult density in spring is 1.4, although this is not significant
- the difference between Spring and Summer for High adult density is 0.55
- the difference between Spring and Autumn for High adult density is -0.68
- the difference between Spring and Winter for High adult density is -0.05
- if there were no interactions, we would expect the Low density Summer recruitment to be the additive of the main effects (Low and Summer). However, the modelled mean is 0.65 less than the additive effects would have expected. This value is significantly different to 0, indicating that there is evidence that the density effect in Summer is different to that in Spring.
- the density effect in Autumn and Winter were not found to be significantly different from what you would expect from an additive model.
quinn.brmsNB$fit |>
tidyMCMC(
estimate.method = "median",
conf.int = TRUE,
conf.method = "HPDinterval",
rhat = TRUE,
ess = TRUE
)# A tibble: 11 × 7
term estimate std.error conf.low conf.high rhat ess
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 b_Intercept 2.42 0.193 2.05 2.79 0.999 1510
2 b_fSEASONSummer 1.40 0.252 0.911 1.91 0.998 1459
3 b_fSEASONAutumn 0.546 0.254 0.0318 1.05 1.00 1482
4 b_fSEASONWinter -0.679 0.288 -1.22 -0.0905 0.998 1590
5 b_DENSITYLow -0.0524 0.267 -0.562 0.444 1.000 1534
6 b_fSEASONSummer:DENSITYLow -0.652 0.352 -1.34 -0.00730 1.00 1401
7 b_fSEASONAutumn:DENSITYLow 0.00867 0.372 -0.739 0.705 1.000 1441
8 b_fSEASONWinter:DENSITYLow -0.612 0.485 -1.57 0.338 0.999 1527
9 shape 6.78 3.18 2.37 12.7 0.998 1517
10 Intercept 2.64 0.0803 2.51 2.82 0.999 1472
11 lprior -16.4 0.838 -18.1 -14.9 0.999 1524
Conclusions:
see above
# A draws_df: 500 iterations, 3 chains, and 12 variables
b_Intercept b_fSEASONSummer b_fSEASONAutumn b_fSEASONWinter b_DENSITYLow
1 2.4 1.3 0.546 -0.3856 -0.229
2 2.6 1.3 -0.052 -0.6875 -0.320
3 2.4 1.3 0.540 -0.6144 0.048
4 2.4 1.3 0.579 -0.8332 -0.040
5 2.3 1.6 0.307 -1.0020 0.040
6 2.4 1.7 0.460 -0.0025 -0.299
7 2.6 1.3 0.500 -0.6922 -0.048
8 2.3 1.3 0.505 -0.7135 0.095
9 2.3 1.5 0.774 -0.4278 0.133
10 2.2 1.7 0.889 -0.5055 0.256
b_fSEASONSummer:DENSITYLow b_fSEASONAutumn:DENSITYLow
1 -0.28 0.35
2 -0.57 0.28
3 -0.54 -0.12
4 -0.40 -0.13
5 -0.78 0.62
6 -0.83 0.66
7 -0.71 0.27
8 -0.42 -0.17
9 -1.22 -0.24
10 -1.15 0.14
b_fSEASONWinter:DENSITYLow
1 -1.147
2 -1.071
3 0.075
4 -0.756
5 -0.300
6 -1.295
7 -0.325
8 -0.933
9 -0.514
10 -1.114
# ... with 1490 more draws, and 4 more variables
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}
quinn.brmsNB |>
as_draws_df() |>
summarise_draws(
median,
~ HDInterval::hdi(.x),
rhat,
ess_bulk, ess_tail
)# A tibble: 12 × 7
variable median lower upper rhat ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 b_Intercept 2.41e+0 2.05e+0 2.79e+0 0.999 1518. 1501.
2 b_fSEASONSummer 1.40e+0 9.11e-1 1.91e+0 0.999 1467. 1542.
3 b_fSEASONAutumn 5.45e-1 3.18e-2 1.05e+0 1.00 1493. 1500.
4 b_fSEASONWinter -6.80e-1 -1.22e+0 -9.05e-2 1.00 1591. 1461.
5 b_DENSITYLow -4.55e-2 -5.62e-1 4.44e-1 1.000 1540. 1409.
6 b_fSEASONSummer:DENSITYLow -6.63e-1 -1.34e+0 -7.30e-3 1.00 1485. 1459.
7 b_fSEASONAutumn:DENSITYLow 5.09e-3 -7.39e-1 7.05e-1 1.000 1455. 1314.
8 b_fSEASONWinter:DENSITYLow -6.18e-1 -1.57e+0 3.38e-1 0.999 1534. 1478.
9 shape 6.13e+0 2.37e+0 1.27e+1 1.00 1538. 1482.
10 Intercept 2.64e+0 2.51e+0 2.82e+0 1.000 1479. 1543.
11 lprior -1.64e+1 -1.81e+1 -1.49e+1 0.999 1566. 1386.
12 lp__ -1.57e+2 -1.62e+2 -1.53e+2 1.00 1366. 1330.
quinn.brmsNB |>
as_draws_df() |>
exp() |>
summarise_draws(
median,
HDInterval::hdi,
rhat,
length,
ess_bulk, ess_tail
)# A tibble: 12 × 8
variable median lower upper rhat length ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 b_Intercept 1.11e+ 1 7.69e+ 0 1.61e+ 1 0.999 1500 1518. 1501.
2 b_fSEASONSummer 4.07e+ 0 2.29e+ 0 6.40e+ 0 0.999 1500 1467. 1542.
3 b_fSEASONAutumn 1.73e+ 0 9.24e- 1 2.68e+ 0 1.00 1500 1493. 1500.
4 b_fSEASONWinter 5.07e- 1 2.55e- 1 8.53e- 1 1.00 1500 1591. 1461.
5 b_DENSITYLow 9.55e- 1 5.56e- 1 1.52e+ 0 1.000 1500 1540. 1409.
6 b_fSEASONSummer:DE… 5.15e- 1 2.42e- 1 9.65e- 1 1.00 1500 1485. 1459.
7 b_fSEASONAutumn:DE… 1.01e+ 0 4.17e- 1 1.92e+ 0 1.000 1500 1455. 1314.
8 b_fSEASONWinter:DE… 5.39e- 1 1.42e- 1 1.21e+ 0 0.999 1500 1534. 1478.
9 shape 4.62e+ 2 5.96e+ 0 3.18e+ 5 1.00 1500 1538. 1482.
10 Intercept 1.40e+ 1 1.18e+ 1 1.63e+ 1 1.000 1500 1479. 1543.
11 lprior 7.86e- 8 1.31e- 9 2.45e- 7 0.999 1500 1566. 1386.
12 lp__ 9.48e-69 5.47e-75 1.53e-67 1.00 1500 1366. 1330.
Due to the presence of a log transform in the predictor, it is better to use the regex version.
[1] "b_Intercept" "b_fSEASONSummer"
[3] "b_fSEASONAutumn" "b_fSEASONWinter"
[5] "b_DENSITYLow" "b_fSEASONSummer:DENSITYLow"
[7] "b_fSEASONAutumn:DENSITYLow" "b_fSEASONWinter:DENSITYLow"
[9] "shape" "Intercept"
[11] "lprior" "lp__"
[13] "accept_stat__" "stepsize__"
[15] "treedepth__" "n_leapfrog__"
[17] "divergent__" "energy__"
quinn.draw <- quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE)
quinn.draw# A tibble: 10,500 × 5
# Groups: .variable [7]
.chain .iteration .draw .variable .value
<int> <int> <int> <chr> <dbl>
1 1 1 1 b_fSEASONSummer 1.33
2 1 2 2 b_fSEASONSummer 1.29
3 1 3 3 b_fSEASONSummer 1.35
4 1 4 4 b_fSEASONSummer 1.27
5 1 5 5 b_fSEASONSummer 1.63
6 1 6 6 b_fSEASONSummer 1.74
7 1 7 7 b_fSEASONSummer 1.27
8 1 8 8 b_fSEASONSummer 1.30
9 1 9 9 b_fSEASONSummer 1.52
10 1 10 10 b_fSEASONSummer 1.73
# ℹ 10,490 more rows
quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
mutate(.value = exp(.value)) |>
summarise_draws(
median,
~ HDInterval::hdi(.x, credMass = 0.95),
rhat,
length,
ess_bulk, ess_tail
)# A tibble: 7 × 9
# Groups: .variable [7]
.variable variable median lower upper rhat length ess_bulk ess_tail
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 b_DENSITYLow .value 0.955 0.556 1.52 1.000 1500 1540. 1409.
2 b_fSEASONAutumn .value 1.73 0.924 2.68 1.00 1500 1493. 1500.
3 b_fSEASONAutumn:DE… .value 1.01 0.417 1.92 1.000 1500 1455. 1314.
4 b_fSEASONSummer .value 4.07 2.29 6.40 0.999 1500 1467. 1542.
5 b_fSEASONSummer:DE… .value 0.515 0.242 0.965 1.00 1500 1485. 1459.
6 b_fSEASONWinter .value 0.507 0.255 0.853 1.00 1500 1591. 1461.
7 b_fSEASONWinter:DE… .value 0.539 0.142 1.21 0.999 1500 1534. 1478.
exceedP <- function(x, Val = 0) mean(x > Val)
quinn.brmsNB |>
tidy_draws() |>
exp() |>
dplyr::select(starts_with("b_")) |>
summarise_draws(
median,
~ HDInterval::hdi(.x, credMass = 0.9),
rhat,
ess_bulk, ess_tail,
~ exceedP(.x, 1)
)# A tibble: 8 × 8
variable median lower upper rhat ess_bulk ess_tail `~exceedP(.x, 1)`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 b_Intercept 11.1 7.68 14.6 1.00 1511. 1493. 1
2 b_fSEASONSummer 4.07 2.58 5.90 0.999 1458. 1524. 1
3 b_fSEASONAutumn 1.73 1.07 2.50 1.000 1486. 1493. 0.979
4 b_fSEASONWinter 0.507 0.295 0.762 1.00 1582. 1450. 0.014
5 b_DENSITYLow 0.955 0.591 1.41 1.00 1531. 1361. 0.433
6 b_fSEASONSummer… 0.515 0.256 0.844 1.00 1350. 1449. 0.03
7 b_fSEASONAutumn… 1.01 0.448 1.67 1.00 1434. 1309. 0.505
8 b_fSEASONWinter… 0.539 0.165 1.02 0.999 1524. 1463. 0.0993
We can then summarise this
# A tibble: 7 × 7
.variable .value .lower .upper .width .point .interval
<chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 b_DENSITYLow -0.0455 -0.562 0.444 0.95 median hdci
2 b_fSEASONAutumn 0.545 0.0318 1.05 0.95 median hdci
3 b_fSEASONAutumn:DENSITYLow 0.00509 -0.719 0.726 0.95 median hdci
4 b_fSEASONSummer 1.40 0.911 1.91 0.95 median hdci
5 b_fSEASONSummer:DENSITYLow -0.663 -1.34 -0.00730 0.95 median hdci
6 b_fSEASONWinter -0.680 -1.22 -0.0905 0.95 median hdci
7 b_fSEASONWinter:DENSITYLow -0.618 -1.57 0.338 0.95 median hdci
We could alternatively express the parameters on the response scale.
# A tibble: 7 × 7
.variable `exp(.value)` .lower .upper .width .point .interval
<chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 b_DENSITYLow 0.955 0.544 1.51 0.95 median hdci
2 b_fSEASONAutumn 1.73 0.948 2.72 0.95 median hdci
3 b_fSEASONAutumn:DENSITYLow 1.01 0.417 1.92 0.95 median hdci
4 b_fSEASONSummer 4.07 2.34 6.45 0.95 median hdci
5 b_fSEASONSummer:DENSITYLow 0.515 0.242 0.965 0.95 median hdci
6 b_fSEASONWinter 0.507 0.267 0.869 0.95 median hdci
7 b_fSEASONWinter:DENSITYLow 0.539 0.142 1.21 0.95 median hdci
quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
geom_vline(xintercept = 0, linetype = "dashed") +
stat_slab(aes(
x = .value, y = .variable,
fill = stat(ggdist::cut_cdf_qi(cdf,
.width = c(0.5, 0.8, 0.95),
labels = scales::percent_format()
))
), color = "black") +
scale_fill_brewer("Interval", direction = -1, na.translate = FALSE)quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
geom_vline(xintercept = 0, linetype = "dashed") +
stat_halfeye(aes(x = .value, y = .variable)) +
theme_classic()We could alternatively express the parameters on the response scale.
quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
group_by(.variable) |>
mutate(.value = exp(.value)) |>
median_hdci()# A tibble: 7 × 7
.variable .value .lower .upper .width .point .interval
<chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 b_DENSITYLow 0.955 0.544 1.51 0.95 median hdci
2 b_fSEASONAutumn 1.73 0.948 2.72 0.95 median hdci
3 b_fSEASONAutumn:DENSITYLow 1.01 0.417 1.92 0.95 median hdci
4 b_fSEASONSummer 4.07 2.34 6.45 0.95 median hdci
5 b_fSEASONSummer:DENSITYLow 0.515 0.242 0.965 0.95 median hdci
6 b_fSEASONWinter 0.507 0.267 0.869 0.95 median hdci
7 b_fSEASONWinter:DENSITYLow 0.539 0.142 1.21 0.95 median hdci
quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
mutate(.value = exp(.value)) |>
ggplot() +
geom_vline(xintercept = 1, linetype = "dashed") +
stat_slab(aes(
x = .value, y = .variable,
fill = stat(ggdist::cut_cdf_qi(cdf,
.width = c(0.5, 0.8, 0.95),
labels = scales::percent_format()
))
), color = "black") +
scale_x_continuous("", trans = scales::log2_trans(), breaks = unique(as.vector(2^(0:4 %o% c(-1, 1))))) +
scale_fill_brewer("Interval", direction = -1, na.translate = FALSE) +
theme_classic()Conclusions:
- the estimated mean (expected number of newly recruited barnacles) on the ALG1 surface is -0.05. This is the mean of the posterior distribution for this parameter. If we back-transform this to the response scale, this becomes 0.96.
- the estimated effect of ALG2 vs ALG1 is 0.55 (median) with a standard error of 0.03. The 95% credibility intervals indicate that we are 95% confident that the effect is between 1.05 and 0.95 - e.g. there is a significant positive effect. When back-transformed onto the response scale, we see that barnacle recruitment on ALG2 is 1.73 times higher than that on ALG1. This represents a 73% increase in barnacle recruitment.
- the estimated effect of NB and S are 0.01 and 1.4 respectively, which equate to 0.99 and 0.25 fold declines respectively.
- Rhat and number of effective samples for each parameter are also provided as MCMC diagnostics and all look good.
## Link scale
quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
stat_slab(aes(
x = .value, y = .variable,
fill = stat(ggdist::cut_cdf_qi(cdf,
.width = c(0.5, 0.8, 0.95),
labels = scales::percent_format()
))
), color = "black") +
geom_vline(xintercept = 0, linetype = "dashed") +
scale_fill_brewer("Interval", direction = -1, na.translate = FALSE)## Fractional scale
quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
mutate(.value = exp(.value)) |>
ggplot() +
stat_slab(aes(
x = .value, y = .variable,
fill = stat(ggdist::cut_cdf_qi(cdf,
.width = c(0.5, 0.8, 0.95),
labels = scales::percent_format()
))
), color = "black") +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_fill_brewer("Interval", direction = -1, na.translate = FALSE) +
scale_x_continuous(trans = scales::log2_trans())quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
stat_halfeye(aes(x = .value, y = .variable)) +
facet_wrap(~.variable, scales = "free")quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
stat_halfeye(aes(x = .value, y = .variable)) +
geom_vline(xintercept = 0, linetype = "dashed")quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
stat_halfeye(aes(x = exp(.value), y = .variable)) +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_x_continuous(trans = scales::log2_trans())quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
geom_density_ridges(aes(x = .value, y = .variable), alpha = 0.4) +
geom_vline(xintercept = 0, linetype = "dashed")Picking joint bandwidth of 0.0654
## Or on a fractional scale
quinn.brmsNB |>
gather_draws(`.Intercept.*|.*fSEASON.*|.*DENSITY.*`, regex = TRUE) |>
ggplot() +
geom_density_ridges_gradient(
aes(
x = exp(.value),
y = .variable,
fill = stat(x)
),
alpha = 0.4, colour = "white",
quantile_lines = TRUE,
quantiles = c(0.025, 0.975)
) +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_x_continuous(trans = scales::log2_trans()) +
scale_fill_viridis_c(option = "C")Picking joint bandwidth of 0.0943
This is purely a graphical depiction on the posteriors.
# A tibble: 1,500 × 21
.chain .iteration .draw b_Intercept b_fSEASONSummer b_fSEASONAutumn
<int> <int> <int> <dbl> <dbl> <dbl>
1 1 1 1 2.40 1.33 0.546
2 1 2 2 2.64 1.29 -0.0525
3 1 3 3 2.40 1.35 0.540
4 1 4 4 2.42 1.27 0.579
5 1 5 5 2.34 1.63 0.307
6 1 6 6 2.35 1.74 0.460
7 1 7 7 2.56 1.27 0.500
8 1 8 8 2.28 1.30 0.505
9 1 9 9 2.29 1.52 0.774
10 1 10 10 2.19 1.73 0.889
# ℹ 1,490 more rows
# ℹ 15 more variables: b_fSEASONWinter <dbl>, b_DENSITYLow <dbl>,
# `b_fSEASONSummer:DENSITYLow` <dbl>, `b_fSEASONAutumn:DENSITYLow` <dbl>,
# `b_fSEASONWinter:DENSITYLow` <dbl>, shape <dbl>, Intercept <dbl>,
# lprior <dbl>, lp__ <dbl>, accept_stat__ <dbl>, stepsize__ <dbl>,
# treedepth__ <dbl>, n_leapfrog__ <dbl>, divergent__ <dbl>, energy__ <dbl>
# A tibble: 1,500 × 10
.chain .iteration .draw b_fSEASONSummer b_fSEASONAutumn b_fSEASONWinter
<int> <int> <int> <dbl> <dbl> <dbl>
1 1 1 1 1.33 0.546 -0.386
2 1 2 2 1.29 -0.0525 -0.688
3 1 3 3 1.35 0.540 -0.614
4 1 4 4 1.27 0.579 -0.833
5 1 5 5 1.63 0.307 -1.00
6 1 6 6 1.74 0.460 -0.00247
7 1 7 7 1.27 0.500 -0.692
8 1 8 8 1.30 0.505 -0.714
9 1 9 9 1.52 0.774 -0.428
10 1 10 10 1.73 0.889 -0.505
# ℹ 1,490 more rows
# ℹ 4 more variables: b_DENSITYLow <dbl>, `b_fSEASONSummer:DENSITYLow` <dbl>,
# `b_fSEASONAutumn:DENSITYLow` <dbl>, `b_fSEASONWinter:DENSITYLow` <dbl>
Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.
# A tibble: 1,500 × 12
b_Intercept b_fSEASONSummer b_fSEASONAutumn b_fSEASONWinter b_DENSITYLow
<dbl> <dbl> <dbl> <dbl> <dbl>
1 2.40 1.33 0.546 -0.386 -0.229
2 2.64 1.29 -0.0525 -0.688 -0.320
3 2.40 1.35 0.540 -0.614 0.0483
4 2.42 1.27 0.579 -0.833 -0.0400
5 2.34 1.63 0.307 -1.00 0.0400
6 2.35 1.74 0.460 -0.00247 -0.299
7 2.56 1.27 0.500 -0.692 -0.0485
8 2.28 1.30 0.505 -0.714 0.0955
9 2.29 1.52 0.774 -0.428 0.133
10 2.19 1.73 0.889 -0.505 0.256
# ℹ 1,490 more rows
# ℹ 7 more variables: `b_fSEASONSummer:DENSITYLow` <dbl>,
# `b_fSEASONAutumn:DENSITYLow` <dbl>, `b_fSEASONWinter:DENSITYLow` <dbl>,
# shape <dbl>, Intercept <dbl>, lprior <dbl>, lp__ <dbl>
Region of Practical Equivalence
[1] 0.2754809
Possible multicollinearity between b_fSEASONSummer:DENSITYLow and
b_DENSITYLow (r = 0.72). This might lead to inappropriate results. See
'Details' in '?rope'.
# Proportion of samples inside the ROPE [-0.28, 0.28]:
Parameter | inside ROPE
--------------------------------------
Intercept | 0.00 %
fSEASONSummer | 0.00 %
fSEASONAutumn | 11.80 %
fSEASONWinter | 5.62 %
DENSITYLow | 74.30 %
fSEASONSummer:DENSITYLow | 13.06 %
fSEASONAutumn:DENSITYLow | 58.29 %
fSEASONWinter:DENSITYLow | 21.14 %
Possible multicollinearity between b_fSEASONSummer:DENSITYLow and
b_DENSITYLow (r = 0.72). This might lead to inappropriate results. See
'Details' in '?rope'.
## Or based on fractional scale
quinn.brmsNB |>
as_draws_df("^b_fSEASON.*|^b_DENSITY.*", regex = TRUE) |>
exp() |>
## equivalence_test(range = c(0.755, 1.32))
rope(range = c(0.755, 1.32))# Proportion of samples inside the ROPE [0.76, 1.32]:
Parameter | inside ROPE
--------------------------------------
fSEASONSummer | 0.00 %
fSEASONAutumn | 11.73 %
fSEASONWinter | 5.69 %
DENSITYLow | 74.30 %
fSEASONSummer:DENSITYLow | 13.06 %
fSEASONAutumn:DENSITYLow | 58.01 %
fSEASONWinter:DENSITYLow | 21.14 %
quinn.mcmc <-
quinn.brmsNB |>
as_draws_df("^b_fSEASON.*|^b_DENSITY.*", regex = TRUE) |>
exp()
quinn.mcmc |>
rope(range = c(0.755, 1.32))# Proportion of samples inside the ROPE [0.76, 1.32]:
Parameter | inside ROPE
--------------------------------------
fSEASONSummer | 0.00 %
fSEASONAutumn | 11.73 %
fSEASONWinter | 5.69 %
DENSITYLow | 74.30 %
fSEASONSummer:DENSITYLow | 13.06 %
fSEASONAutumn:DENSITYLow | 58.01 %
fSEASONWinter:DENSITYLow | 21.14 %
## note, the following is not quit correct, it does not get the CI correct
quinn.mcmc |>
rope(range = c(0.755, 1.32)) |>
plot(data = quinn.brmsNB)# Test for Practical Equivalence
ROPE: [0.76 1.32]
Parameter | H0 | inside ROPE | 95% HDI
-----------------------------------------------------------------
fSEASONSummer | Rejected | 0.00 % | [2.41, 6.64]
fSEASONAutumn | Undecided | 11.73 % | [1.03, 2.86]
fSEASONWinter | Undecided | 5.69 % | [0.29, 0.91]
DENSITYLow | Undecided | 74.30 % | [0.57, 1.56]
fSEASONSummer:DENSITYLow | Undecided | 13.06 % | [0.27, 1.05]
fSEASONAutumn:DENSITYLow | Undecided | 58.01 % | [0.49, 2.07]
fSEASONWinter:DENSITYLow | Undecided | 21.14 % | [0.21, 1.38]
quinn.brmsNB |> modelsummary(
statistic = c("conf.low", "conf.high"),
shape = term ~ statistic,
exponentiate = TRUE
)Warning:
`modelsummary` uses the `performance` package to extract goodness-of-fit
statistics from models of this class. You can specify the statistics you wish
to compute by supplying a `metrics` argument to `modelsummary`, which will then
push it forward to `performance`. Acceptable values are: "all", "common",
"none", or a character vector of metrics names. For example: `modelsummary(mod,
metrics = c("RMSE", "R2")` Note that some metrics are computationally
expensive. See `?performance::performance` for details.
This warning appears once per session.
| (1) | |||
|---|---|---|---|
| Est. | 2.5 % | 97.5 % | |
| b_Intercept | 11.140 | 7.806 | 16.322 |
| b_fSEASONSummer | 4.074 | 2.414 | 6.645 |
| b_fSEASONAutumn | 1.725 | 1.033 | 2.858 |
| b_fSEASONWinter | 0.507 | 0.295 | 0.912 |
| b_DENSITYLow | 0.955 | 0.570 | 1.557 |
| b_fSEASONSummer × DENSITYLow | 0.515 | 0.268 | 1.046 |
| b_fSEASONAutumn × DENSITYLow | 1.005 | 0.488 | 2.068 |
| b_fSEASONWinter × DENSITYLow | 0.539 | 0.205 | 1.382 |
| Num.Obs. | 42 | ||
| R2 | 0.727 | ||
| ELPD | -148.3 | ||
| ELPD s.e. | 6.0 | ||
| LOOIC | 296.5 | ||
| LOOIC s.e. | 12.0 | ||
| WAIC | 295.5 | ||
| RMSE | 7.47 | ||
11 Further investigations
fSEASON = Spring:
contrast ratio lower.HPD upper.HPD
High / Low 0.873 0.363 1.73
fSEASON = Summer:
contrast ratio lower.HPD upper.HPD
High / Low 2.193 0.907 3.80
fSEASON = Autumn:
contrast ratio lower.HPD upper.HPD
High / Low 1.067 0.365 1.99
fSEASON = Winter:
contrast ratio lower.HPD upper.HPD
High / Low 2.139 0.477 5.68
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
## absolute response scale
quinn.rstanarmNB |>
emmeans(~ DENSITY | fSEASON, type = "link") |>
regrid() |>
pairs()fSEASON = Spring:
contrast estimate lower.HPD upper.HPD
High - Low -1.42 -10.56 7.41
fSEASON = Summer:
contrast estimate lower.HPD upper.HPD
High - Low 26.45 2.40 55.68
fSEASON = Autumn:
contrast estimate lower.HPD upper.HPD
High - Low 1.25 -16.19 17.02
fSEASON = Winter:
contrast estimate lower.HPD upper.HPD
High - Low 2.92 -1.63 7.44
Point estimate displayed: median
HPD interval probability: 0.95
quinn.em <- quinn.rstanarmNB |>
emmeans(~ DENSITY | fSEASON, type = "link") |>
pairs() |>
gather_emmeans_draws() |>
mutate(Fit = exp(.value))
head(quinn.em)# A tibble: 6 × 7
# Groups: contrast, fSEASON [1]
contrast fSEASON .chain .iteration .draw .value Fit
<fct> <fct> <int> <int> <int> <dbl> <dbl>
1 High - Low Spring NA NA 1 0.000214 1.00
2 High - Low Spring NA NA 2 0.175 1.19
3 High - Low Spring NA NA 3 -0.218 0.805
4 High - Low Spring NA NA 4 -0.464 0.629
5 High - Low Spring NA NA 5 0.190 1.21
6 High - Low Spring NA NA 6 -0.460 0.631
g2 <- quinn.em |>
group_by(contrast, fSEASON) |>
median_hdci() |>
ggplot() +
geom_vline(xintercept = 1, linetype = "dashed") +
geom_pointrange(aes(x = Fit, y = fSEASON, xmin = Fit.lower, xmax = Fit.upper)) +
scale_x_continuous("Effect size (High/Low)", trans = scales::log2_trans(), breaks = unique(as.vector(2^(0:4 %o% c(-1, 1))))) +
theme_classic()
g2ggplot(quinn.em, aes(x = Fit)) +
geom_histogram() +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_x_continuous("Effect size (High/Low)", trans = scales::log2_trans(), breaks = unique(as.vector(2^(0:4 %o% c(-1, 1))))) +
facet_wrap(fSEASON ~ contrast, scales = "free")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# A tibble: 4 × 8
contrast fSEASON Fit .lower .upper .width .point .interval
<fct> <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 High - Low Spring 0.873 0.363 1.73 0.95 median hdci
2 High - Low Summer 2.19 0.907 3.80 0.95 median hdci
3 High - Low Autumn 1.07 0.435 2.08 0.95 median hdci
4 High - Low Winter 2.14 0.477 5.68 0.95 median hdci
`summarise()` has grouped output by 'contrast'. You can override using the
`.groups` argument.
# A tibble: 4 × 3
# Groups: contrast [1]
contrast fSEASON P
<fct> <fct> <dbl>
1 High - Low Spring 0.368
2 High - Low Summer 0.989
3 High - Low Autumn 0.562
4 High - Low Winter 0.908
## Probability of effect greater than 10%
quinn.em |>
group_by(contrast, fSEASON) |>
summarize(P = mean(Fit > 1.1))`summarise()` has grouped output by 'contrast'. You can override using the
`.groups` argument.
# A tibble: 4 × 3
# Groups: contrast [1]
contrast fSEASON P
<fct> <fct> <dbl>
1 High - Low Spring 0.279
2 High - Low Summer 0.978
3 High - Low Autumn 0.464
4 High - Low Winter 0.882
## Using summarise_draws
quinn.rstanarmNB |>
emmeans(~ DENSITY | fSEASON, type = "link") |>
pairs() |>
tidy_draws() |>
exp() |>
summarise_draws(median,
HDInterval::hdi,
P = ~ mean(.x > 1)
)# A tibble: 4 × 5
variable median lower upper P
<chr> <dbl> <dbl> <dbl> <dbl>
1 contrast High - Low fSEASON Spring 0.873 0.363 1.73 0.368
2 contrast High - Low fSEASON Summer 2.19 0.907 3.80 0.989
3 contrast High - Low fSEASON Autumn 1.07 0.365 1.99 0.562
4 contrast High - Low fSEASON Winter 2.14 0.477 5.68 0.908
newdata <- with(quinn, expand.grid(
fSEASON = levels(fSEASON),
DENSITY = levels(DENSITY)
))
Xmat <- model.matrix(~ fSEASON * DENSITY, data = newdata)
as.matrix(quinn.rstanarmNB) |> head() parameters
iterations (Intercept) fSEASONSummer fSEASONAutumn fSEASONWinter DENSITYLow
[1,] 2.490577 1.434142 0.3538466 -0.6293523 -0.0002144918
[2,] 2.499496 1.891457 0.6264150 -1.0136037 -0.1747841574
[3,] 2.153970 1.412938 0.8550894 -0.1812812 0.2175113988
[4,] 2.034064 1.444481 1.1304266 -0.5131506 0.4642164636
[5,] 2.241336 1.825574 0.8935352 -0.3970915 -0.1899161606
[6,] 2.027883 2.409375 1.0041219 -0.6304930 0.4598410354
parameters
iterations fSEASONSummer:DENSITYLow fSEASONAutumn:DENSITYLow
[1,] -0.7804621 -0.42276426
[2,] -0.9617139 -0.66812440
[3,] -0.5618188 -0.13707898
[4,] -1.1242590 -0.43123034
[5,] -0.6093922 0.08377498
[6,] -1.9806556 -0.14696733
parameters
iterations fSEASONWinter:DENSITYLow reciprocal_dispersion
[1,] -0.42838551 3.150351
[2,] -0.04236177 3.544500
[3,] -0.75648354 5.109838
[4,] -1.57997721 4.616986
[5,] -1.06043096 5.535041
[6,] -1.41727483 3.167478
## coefs <- as.matrix(quinn.rstanarmNB)
coefs <- as.matrix(as.data.frame(quinn.rstanarmNB) |>
dplyr:::select(-reciprocal_dispersion)) |>
as.matrix()
fit <- exp(coefs %*% t(Xmat))
newdata <- newdata |>
cbind(tidyMCMC(fit, conf.int = TRUE, conf.method = "HPDinterval"))
head(newdata) fSEASON DENSITY term estimate std.error conf.low conf.high
1 Spring High 1 10.500333 2.864573 5.771379 16.025759
2 Summer High 2 50.918282 12.407890 30.039708 74.505519
3 Autumn High 3 20.629538 5.180588 12.014755 30.543308
4 Winter High 4 5.901995 1.735567 2.937899 9.350163
5 Spring Low 5 11.972151 3.559382 5.826303 18.470030
6 Summer Low 6 23.133068 5.683352 14.066966 35.074633
ggplot(newdata, aes(y = estimate, x = fSEASON, fill = DENSITY)) +
geom_blank() +
geom_line(aes(x = as.numeric(fSEASON), ymin = conf.low, ymax = conf.high, linetype = DENSITY),
position = position_dodge(0.2)
) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
shape = 21,
position = position_dodge(0.2)
)Warning in geom_line(aes(x = as.numeric(fSEASON), ymin = conf.low, ymax =
conf.high, : Ignoring unknown aesthetics: ymin and ymax
# Compare high and low in each season
# via contrasts
newdata <- with(quinn, expand.grid(
fSEASON = levels(fSEASON),
DENSITY = levels(DENSITY)
))
## factor differences
Xmat <- model.matrix(~ fSEASON * DENSITY, data = newdata)
Xmat.high <- Xmat[newdata$DENSITY == "High", ]
Xmat.low <- Xmat[newdata$DENSITY == "Low", ]
Xmat.density <- Xmat.high - Xmat.low
rownames(Xmat.density) <- levels(quinn$fSEASON)
coefs <- as.matrix(as.data.frame(quinn.rstanarmNB) |> dplyr:::select(-reciprocal_dispersion))
fit <- exp(coefs %*% t(Xmat.density))
tidyMCMC(fit, conf.int = TRUE, conf.method = "HPDinterval")# A tibble: 4 × 5
term estimate std.error conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl>
1 Spring 0.950 0.378 0.363 1.73
2 Summer 2.33 0.796 0.907 3.80
3 Autumn 1.14 0.449 0.365 1.99
4 Winter 2.52 1.59 0.477 5.68
## or absolute
fit.high <- coefs %*% t(Xmat.high)
fit.low <- coefs %*% t(Xmat.low)
fit <- exp(fit.high) - exp(fit.low)
# fit = exp(fit.high - fit.low)
tidyMCMC(fit, conf.int = TRUE, conf.method = "HPDinterval")# A tibble: 4 × 5
term estimate std.error conf.low conf.high
<chr> <dbl> <dbl> <dbl> <dbl>
1 1 -1.47 4.64 -10.6 7.41
2 2 27.8 13.6 2.40 55.7
3 3 0.881 7.88 -16.2 17.0
4 4 2.92 2.34 -1.63 7.44
Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
fSEASON = Spring:
contrast ratio lower.HPD upper.HPD
High / Low 1.05 0.604 1.71
fSEASON = Summer:
contrast ratio lower.HPD upper.HPD
High / Low 2.03 1.171 3.13
fSEASON = Autumn:
contrast ratio lower.HPD upper.HPD
High / Low 1.05 0.554 1.75
fSEASON = Winter:
contrast ratio lower.HPD upper.HPD
High / Low 1.94 0.632 4.03
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
## absolute response scale
quinn.brmsNB |>
emmeans(~ DENSITY | fSEASON, type = "link") |>
regrid() |>
pairs()Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
fSEASON = Spring:
contrast estimate lower.HPD upper.HPD
High - Low 0.494 -5.797 5.92
fSEASON = Summer:
contrast estimate lower.HPD upper.HPD
High - Low 22.780 7.278 41.46
fSEASON = Autumn:
contrast estimate lower.HPD upper.HPD
High - Low 0.993 -10.406 12.06
fSEASON = Winter:
contrast estimate lower.HPD upper.HPD
High - Low 2.695 -0.839 6.38
Point estimate displayed: median
HPD interval probability: 0.95
quinn.em <- quinn.brmsNB |>
emmeans(~ DENSITY | fSEASON, type = "link") |>
pairs() |>
gather_emmeans_draws() |>
mutate(Fit = exp(.value))Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
# A tibble: 6 × 7
# Groups: contrast, fSEASON [1]
contrast fSEASON .chain .iteration .draw .value Fit
<fct> <fct> <int> <int> <int> <dbl> <dbl>
1 High - Low Spring NA NA 1 0.229 1.26
2 High - Low Spring NA NA 2 0.320 1.38
3 High - Low Spring NA NA 3 -0.0483 0.953
4 High - Low Spring NA NA 4 0.0400 1.04
5 High - Low Spring NA NA 5 -0.0400 0.961
6 High - Low Spring NA NA 6 0.299 1.35
g2 <- quinn.em |>
group_by(contrast, fSEASON) |>
median_hdci() |>
ggplot() +
geom_vline(xintercept = 1, linetype = "dashed") +
geom_pointrange(aes(x = Fit, y = fSEASON, xmin = Fit.lower, xmax = Fit.upper)) +
scale_x_continuous("Effect size (High/Low)", trans = scales::log2_trans(), breaks = unique(as.vector(2^(0:4 %o% c(-1, 1))))) +
theme_classic()
g2ggplot(quinn.em, aes(x = Fit)) +
geom_histogram() +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_x_continuous("Effect size (High/Low)", trans = scales::log2_trans(), breaks = unique(as.vector(2^(0:4 %o% c(-1, 1))))) +
facet_wrap(fSEASON ~ contrast, scales = "free")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# A tibble: 4 × 8
contrast fSEASON Fit .lower .upper .width .point .interval
<fct> <fct> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 High - Low Spring 1.05 0.586 1.69 0.95 median hdci
2 High - Low Summer 2.03 1.17 3.13 0.95 median hdci
3 High - Low Autumn 1.05 0.554 1.75 0.95 median hdci
4 High - Low Winter 1.94 0.727 4.13 0.95 median hdci
`summarise()` has grouped output by 'contrast'. You can override using the
`.groups` argument.
# A tibble: 4 × 3
# Groups: contrast [1]
contrast fSEASON P
<fct> <fct> <dbl>
1 High - Low Spring 0.567
2 High - Low Summer 0.998
3 High - Low Autumn 0.569
4 High - Low Winter 0.941
## Probability of effect greater than 10%
quinn.em |>
group_by(contrast, fSEASON) |>
summarize(P = mean(Fit > 1.1))`summarise()` has grouped output by 'contrast'. You can override using the
`.groups` argument.
# A tibble: 4 × 3
# Groups: contrast [1]
contrast fSEASON P
<fct> <fct> <dbl>
1 High - Low Spring 0.427
2 High - Low Summer 0.993
3 High - Low Autumn 0.439
4 High - Low Winter 0.902
12 Summary figures
newdata <- quinn.rstanarmNB |>
emmeans(~ fSEASON | DENSITY, type = "response") |>
as.data.frame()
head(newdata) fSEASON DENSITY prob lower.HPD upper.HPD
Spring High 10.12416 5.771379 16.02576
Summer High 49.33369 30.039708 74.50552
Autumn High 19.92536 12.014755 30.54331
Winter High 5.70499 2.937899 9.35016
Spring Low 11.42190 5.826303 18.47003
Summer Low 22.25835 14.066966 35.07463
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
g1 <- ggplot(newdata, aes(y = prob, x = fSEASON, color = DENSITY)) +
geom_pointrange(aes(ymin = lower.HPD, ymax = upper.HPD),
position = position_dodge(width = 0.2)
) +
theme_classic()
g1 + g2Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
fSEASON DENSITY prob lower.HPD upper.HPD
Spring High 11.14010 7.68563 16.07623
Summer High 45.51770 32.22511 62.87018
Autumn High 19.27494 12.67378 26.33399
Winter High 5.66943 3.37469 8.55707
Spring Low 10.65443 6.91709 15.56148
Summer Low 22.27792 14.84972 31.60323
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
g1 <- ggplot(newdata, aes(y = prob, x = fSEASON, color = DENSITY)) +
geom_pointrange(aes(ymin = lower.HPD, ymax = upper.HPD),
position = position_dodge(width = 0.2)
) +
theme_classic()
g1 + g213 Observation-level random effects
13.1 brms
quinn <- quinn |>
group_by(fSEASON, DENSITY) |>
mutate(Obs = factor(1:n()))
quinn.form <- bf(RECRUITS ~ fSEASON * DENSITY + (1 | Obs), family = poisson(link = "log"))
get_prior(quinn.form, data = quinn) prior class coef group resp dpar
(flat) b
(flat) b DENSITYLow
(flat) b fSEASONAutumn
(flat) b fSEASONAutumn:DENSITYLow
(flat) b fSEASONSummer
(flat) b fSEASONSummer:DENSITYLow
(flat) b fSEASONWinter
(flat) b fSEASONWinter:DENSITYLow
student_t(3, 2.6, 2.5) Intercept
student_t(3, 0, 2.5) sd
student_t(3, 0, 2.5) sd Obs
student_t(3, 0, 2.5) sd Intercept Obs
nlpar lb ub source
default
(vectorized)
(vectorized)
(vectorized)
(vectorized)
(vectorized)
(vectorized)
(vectorized)
default
0 default
0 (vectorized)
0 (vectorized)
quinn.brmsU <- brm(quinn.form,
data = quinn,
refresh = 0,
chains = 3,
iter = 5000,
thin = 5,
warmup = 2000
)Compiling Stan program...
Start sampling
Warning: Argument 'nsamples' is deprecated. Please use argument 'ndraws'
instead.
quinn.resids <- createDHARMa(
simulatedResponse = t(preds),
observedResponse = quinn$RECRUITS,
fittedPredictedResponse = apply(preds, 2, median),
integerResponse = TRUE
)
plot(quinn.resids)Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
DENSITY = High:
fSEASON rate lower.HPD upper.HPD
Spring 9.69610 6.29455 13.65600
Summer 47.00333 35.50918 61.04182
Autumn 19.09840 13.26078 25.61348
Winter 5.48218 3.42612 7.98261
DENSITY = Low:
fSEASON rate lower.HPD upper.HPD
Spring 10.79419 7.14426 15.30824
Summer 21.41149 15.02870 28.45182
Autumn 16.49729 11.05601 22.27960
Winter 2.39549 0.89291 4.50543
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
ggplot(newdata, aes(y = rate, x = fSEASON, color = DENSITY)) +
geom_pointrange(aes(ymin = lower.HPD, ymax = upper.HPD),
position = position_dodge(width = 0.2)
)